blob: 1036367aae11c7cf6375263ab1fd380e0b4fa25d [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
13
14/* Pickle opcodes. These must be kept updated with pickle.py.
15 Extensive docs are in pickletools.py. */
16enum opcode {
17 MARK = '(',
18 STOP = '.',
19 POP = '0',
20 POP_MARK = '1',
21 DUP = '2',
22 FLOAT = 'F',
23 INT = 'I',
24 BININT = 'J',
25 BININT1 = 'K',
26 LONG = 'L',
27 BININT2 = 'M',
28 NONE = 'N',
29 PERSID = 'P',
30 BINPERSID = 'Q',
31 REDUCE = 'R',
32 STRING = 'S',
33 BINSTRING = 'T',
34 SHORT_BINSTRING = 'U',
35 UNICODE = 'V',
36 BINUNICODE = 'X',
37 APPEND = 'a',
38 BUILD = 'b',
39 GLOBAL = 'c',
40 DICT = 'd',
41 EMPTY_DICT = '}',
42 APPENDS = 'e',
43 GET = 'g',
44 BINGET = 'h',
45 INST = 'i',
46 LONG_BINGET = 'j',
47 LIST = 'l',
48 EMPTY_LIST = ']',
49 OBJ = 'o',
50 PUT = 'p',
51 BINPUT = 'q',
52 LONG_BINPUT = 'r',
53 SETITEM = 's',
54 TUPLE = 't',
55 EMPTY_TUPLE = ')',
56 SETITEMS = 'u',
57 BINFLOAT = 'G',
58
59 /* Protocol 2. */
60 PROTO = '\x80',
61 NEWOBJ = '\x81',
62 EXT1 = '\x82',
63 EXT2 = '\x83',
64 EXT4 = '\x84',
65 TUPLE1 = '\x85',
66 TUPLE2 = '\x86',
67 TUPLE3 = '\x87',
68 NEWTRUE = '\x88',
69 NEWFALSE = '\x89',
70 LONG1 = '\x8a',
71 LONG4 = '\x8b',
72
73 /* Protocol 3 (Python 3.x) */
74 BINBYTES = 'B',
75 SHORT_BINBYTES = 'C',
76};
77
78/* These aren't opcodes -- they're ways to pickle bools before protocol 2
79 * so that unpicklers written before bools were introduced unpickle them
80 * as ints, but unpicklers after can recognize that bools were intended.
81 * Note that protocol 2 added direct ways to pickle bools.
82 */
83#undef TRUE
84#define TRUE "I01\n"
85#undef FALSE
86#define FALSE "I00\n"
87
88enum {
89 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
90 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
91 break if this gets out of synch with pickle.py, but it's unclear that would
92 help anything either. */
93 BATCHSIZE = 1000,
94
95 /* Nesting limit until Pickler, when running in "fast mode", starts
96 checking for self-referential data-structures. */
97 FAST_NESTING_LIMIT = 50,
98
99 /* Size of the write buffer of Pickler. Higher values will reduce the
100 number of calls to the write() method of the output stream. */
101 WRITE_BUF_SIZE = 256,
102};
103
104/* Exception classes for pickle. These should override the ones defined in
105 pickle.py, when the C-optimized Pickler and Unpickler are used. */
106static PyObject *PickleError;
107static PyObject *PicklingError;
108static PyObject *UnpicklingError;
109
110/* copyreg.dispatch_table, {type_object: pickling_function} */
111static PyObject *dispatch_table;
112/* For EXT[124] opcodes. */
113/* copyreg._extension_registry, {(module_name, function_name): code} */
114static PyObject *extension_registry;
115/* copyreg._inverted_registry, {code: (module_name, function_name)} */
116static PyObject *inverted_registry;
117/* copyreg._extension_cache, {code: object} */
118static PyObject *extension_cache;
119
120/* XXX: Are these really nescessary? */
121/* As the name says, an empty tuple. */
122static PyObject *empty_tuple;
123/* For looking up name pairs in copyreg._extension_registry. */
124static PyObject *two_tuple;
125
126static int
127stack_underflow(void)
128{
129 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
130 return -1;
131}
132
133/* Internal data type used as the unpickling stack. */
134typedef struct {
135 PyObject_HEAD
136 int length; /* number of initial slots in data currently used */
137 int size; /* number of slots in data allocated */
138 PyObject **data;
139} Pdata;
140
141static void
142Pdata_dealloc(Pdata *self)
143{
144 int i;
145 PyObject **p;
146
147 for (i = self->length, p = self->data; --i >= 0; p++) {
148 Py_DECREF(*p);
149 }
150 if (self->data)
151 PyMem_Free(self->data);
152 PyObject_Del(self);
153}
154
155static PyTypeObject Pdata_Type = {
156 PyVarObject_HEAD_INIT(NULL, 0)
157 "_pickle.Pdata", /*tp_name*/
158 sizeof(Pdata), /*tp_basicsize*/
159 0, /*tp_itemsize*/
160 (destructor)Pdata_dealloc, /*tp_dealloc*/
161};
162
163static PyObject *
164Pdata_New(void)
165{
166 Pdata *self;
167
168 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
169 return NULL;
170 self->size = 8;
171 self->length = 0;
172 self->data = PyMem_Malloc(self->size * sizeof(PyObject *));
173 if (self->data)
174 return (PyObject *)self;
175 Py_DECREF(self);
176 return PyErr_NoMemory();
177}
178
179
180/* Retain only the initial clearto items. If clearto >= the current
181 * number of items, this is a (non-erroneous) NOP.
182 */
183static int
184Pdata_clear(Pdata *self, int clearto)
185{
186 int i;
187 PyObject **p;
188
189 if (clearto < 0)
190 return stack_underflow();
191 if (clearto >= self->length)
192 return 0;
193
194 for (i = self->length, p = self->data + clearto; --i >= clearto; p++) {
195 Py_CLEAR(*p);
196 }
197 self->length = clearto;
198
199 return 0;
200}
201
202static int
203Pdata_grow(Pdata *self)
204{
205 int bigger;
206 size_t nbytes;
207 PyObject **tmp;
208
209 bigger = (self->size << 1) + 1;
210 if (bigger <= 0) /* was 0, or new value overflows */
211 goto nomemory;
212 if ((int)(size_t)bigger != bigger)
213 goto nomemory;
214 nbytes = (size_t)bigger * sizeof(PyObject *);
215 if (nbytes / sizeof(PyObject *) != (size_t)bigger)
216 goto nomemory;
217 tmp = PyMem_Realloc(self->data, nbytes);
218 if (tmp == NULL)
219 goto nomemory;
220 self->data = tmp;
221 self->size = bigger;
222 return 0;
223
224 nomemory:
225 PyErr_NoMemory();
226 return -1;
227}
228
229/* D is a Pdata*. Pop the topmost element and store it into V, which
230 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
231 * is raised and V is set to NULL.
232 */
233static PyObject *
234Pdata_pop(Pdata *self)
235{
236 if (self->length == 0) {
237 PyErr_SetString(UnpicklingError, "bad pickle data");
238 return NULL;
239 }
240 return self->data[--(self->length)];
241}
242#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
243
244static int
245Pdata_push(Pdata *self, PyObject *obj)
246{
247 if (self->length == self->size && Pdata_grow(self) < 0) {
248 return -1;
249 }
250 self->data[self->length++] = obj;
251 return 0;
252}
253
254/* Push an object on stack, transferring its ownership to the stack. */
255#define PDATA_PUSH(D, O, ER) do { \
256 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
257
258/* Push an object on stack, adding a new reference to the object. */
259#define PDATA_APPEND(D, O, ER) do { \
260 Py_INCREF((O)); \
261 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
262
263static PyObject *
264Pdata_poptuple(Pdata *self, Py_ssize_t start)
265{
266 PyObject *tuple;
267 Py_ssize_t len, i, j;
268
269 len = self->length - start;
270 tuple = PyTuple_New(len);
271 if (tuple == NULL)
272 return NULL;
273 for (i = start, j = 0; j < len; i++, j++)
274 PyTuple_SET_ITEM(tuple, j, self->data[i]);
275
276 self->length = start;
277 return tuple;
278}
279
280static PyObject *
281Pdata_poplist(Pdata *self, Py_ssize_t start)
282{
283 PyObject *list;
284 Py_ssize_t len, i, j;
285
286 len = self->length - start;
287 list = PyList_New(len);
288 if (list == NULL)
289 return NULL;
290 for (i = start, j = 0; j < len; i++, j++)
291 PyList_SET_ITEM(list, j, self->data[i]);
292
293 self->length = start;
294 return list;
295}
296
297typedef struct PicklerObject {
298 PyObject_HEAD
299 PyObject *write; /* write() method of the output stream */
300 PyObject *memo; /* Memo dictionary, keep track of the seen
301 objects to support self-referential objects
302 pickling. */
303 PyObject *pers_func; /* persistent_id() method, can be NULL */
304 PyObject *arg;
305 int proto; /* Pickle protocol number, >= 0 */
306 int bin; /* Boolean, true if proto > 0 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000307 int buf_size; /* Size of the current buffered pickle data */
308 char *write_buf; /* Write buffer, this is to avoid calling the
309 write() method of the output stream too
310 often. */
311 int fast; /* Enable fast mode if set to a true value.
312 The fast mode disable the usage of memo,
313 therefore speeding the pickling process by
314 not generating superfluous PUT opcodes. It
315 should not be used if with self-referential
316 objects. */
317 int fast_nesting;
318 PyObject *fast_memo;
319} PicklerObject;
320
321typedef struct UnpicklerObject {
322 PyObject_HEAD
323 Pdata *stack; /* Pickle data stack, store unpickled objects. */
324 PyObject *readline; /* readline() method of the output stream */
325 PyObject *read; /* read() method of the output stream */
326 PyObject *memo; /* Memo dictionary, provide the objects stored
327 using the PUT opcodes. */
328 PyObject *arg;
329 PyObject *pers_func; /* persistent_load() method, can be NULL. */
330 PyObject *last_string; /* Reference to the last string read by the
331 readline() method. */
332 char *buffer; /* Reading buffer. */
333 char *encoding; /* Name of the encoding to be used for
334 decoding strings pickled using Python
335 2.x. The default value is "ASCII" */
336 char *errors; /* Name of errors handling scheme to used when
337 decoding strings. The default value is
338 "strict". */
339 int *marks; /* Mark stack, used for unpickling container
340 objects. */
341 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
342 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
343} UnpicklerObject;
344
345/* Forward declarations */
346static int save(PicklerObject *, PyObject *, int);
347static int save_reduce(PicklerObject *, PyObject *, PyObject *);
348static PyTypeObject Pickler_Type;
349static PyTypeObject Unpickler_Type;
350
351
352/* Helpers for creating the argument tuple passed to functions. This has the
353 performance advantage of calling PyTuple_New() only once. */
354
355#define ARG_TUP(self, obj) do { \
356 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
357 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
358 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
359 } \
360 else { \
361 Py_DECREF((obj)); \
362 } \
363 } while (0)
364
365#define FREE_ARG_TUP(self) do { \
366 if ((self)->arg->ob_refcnt > 1) \
367 Py_CLEAR((self)->arg); \
368 } while (0)
369
370/* A temporary cleaner API for fast single argument function call.
371
372 XXX: Does caching the argument tuple provides any real performance benefits?
373
374 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
375 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
376 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
377 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
378 (i.e, call PyTuple_New() and store the returned value in an array), to save
379 one second (wall clock time). Either ways, the loading time a pickle stream
380 large enough to generate this number of calls would be massively
381 overwhelmed by other factors, like I/O throughput, the GC traversal and
382 object allocation overhead. So, I really doubt these functions provide any
383 real benefits.
384
385 On the other hand, oprofile reports that pickle spends a lot of time in
386 these functions. But, that is probably more related to the function call
387 overhead, than the argument tuple allocation.
388
389 XXX: And, what is the reference behavior of these? Steal, borrow? At first
390 glance, it seems to steal the reference of 'arg' and borrow the reference
391 of 'func'.
392 */
393static PyObject *
394pickler_call(PicklerObject *self, PyObject *func, PyObject *arg)
395{
396 PyObject *result = NULL;
397
398 ARG_TUP(self, arg);
399 if (self->arg) {
400 result = PyObject_Call(func, self->arg, NULL);
401 FREE_ARG_TUP(self);
402 }
403 return result;
404}
405
406static PyObject *
407unpickler_call(UnpicklerObject *self, PyObject *func, PyObject *arg)
408{
409 PyObject *result = NULL;
410
411 ARG_TUP(self, arg);
412 if (self->arg) {
413 result = PyObject_Call(func, self->arg, NULL);
414 FREE_ARG_TUP(self);
415 }
416 return result;
417}
418
419static Py_ssize_t
420pickler_write(PicklerObject *self, const char *s, Py_ssize_t n)
421{
422 PyObject *data, *result;
423
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000424 if (self->write_buf == NULL) {
425 PyErr_SetString(PyExc_SystemError, "invalid write buffer");
426 return -1;
427 }
428
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000429 if (s == NULL) {
430 if (!(self->buf_size))
431 return 0;
432 data = PyBytes_FromStringAndSize(self->write_buf, self->buf_size);
433 if (data == NULL)
434 return -1;
435 }
436 else {
437 if (self->buf_size && (n + self->buf_size) > WRITE_BUF_SIZE) {
438 if (pickler_write(self, NULL, 0) < 0)
439 return -1;
440 }
441
442 if (n > WRITE_BUF_SIZE) {
443 if (!(data = PyBytes_FromStringAndSize(s, n)))
444 return -1;
445 }
446 else {
447 memcpy(self->write_buf + self->buf_size, s, n);
448 self->buf_size += n;
449 return n;
450 }
451 }
452
453 /* object with write method */
454 result = pickler_call(self, self->write, data);
455 if (result == NULL)
456 return -1;
457
458 Py_DECREF(result);
459 self->buf_size = 0;
460 return n;
461}
462
463/* XXX: These read/readline functions ought to be optimized. Buffered I/O
464 might help a lot, especially with the new (but much slower) io library.
465 On the other hand, the added complexity might not worth it.
466 */
467
468/* Read at least n characters from the input stream and set s to the current
469 reading position. */
470static Py_ssize_t
471unpickler_read(UnpicklerObject *self, char **s, Py_ssize_t n)
472{
473 PyObject *len;
474 PyObject *data;
475
476 len = PyLong_FromSsize_t(n);
477 if (len == NULL)
478 return -1;
479
480 data = unpickler_call(self, self->read, len);
481 if (data == NULL)
482 return -1;
483
484 /* XXX: Should bytearray be supported too? */
485 if (!PyBytes_Check(data)) {
486 PyErr_SetString(PyExc_ValueError,
487 "read() from the underlying stream did not"
488 "return bytes");
Amaury Forgeot d'Arc5f952572008-11-25 21:11:54 +0000489 Py_DECREF(data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000490 return -1;
491 }
492
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000493 if (PyBytes_GET_SIZE(data) != n) {
494 PyErr_SetNone(PyExc_EOFError);
Amaury Forgeot d'Arc5f952572008-11-25 21:11:54 +0000495 Py_DECREF(data);
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000496 return -1;
497 }
498
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000499 Py_XDECREF(self->last_string);
500 self->last_string = data;
501
502 if (!(*s = PyBytes_AS_STRING(data)))
503 return -1;
504
505 return n;
506}
507
508static Py_ssize_t
509unpickler_readline(UnpicklerObject *self, char **s)
510{
511 PyObject *data;
512
513 data = PyObject_CallObject(self->readline, empty_tuple);
514 if (data == NULL)
515 return -1;
516
517 /* XXX: Should bytearray be supported too? */
518 if (!PyBytes_Check(data)) {
519 PyErr_SetString(PyExc_ValueError,
520 "readline() from the underlying stream did not"
521 "return bytes");
522 return -1;
523 }
524
525 Py_XDECREF(self->last_string);
526 self->last_string = data;
527
528 if (!(*s = PyBytes_AS_STRING(data)))
529 return -1;
530
531 return PyBytes_GET_SIZE(data);
532}
533
534/* Generate a GET opcode for an object stored in the memo. The 'key' argument
535 should be the address of the object as returned by PyLong_FromVoidPtr(). */
536static int
537memo_get(PicklerObject *self, PyObject *key)
538{
539 PyObject *value;
540 PyObject *memo_id;
541 long x;
542 char pdata[30];
543 int len;
544
545 value = PyDict_GetItemWithError(self->memo, key);
546 if (value == NULL) {
547 if (!PyErr_Occurred())
548 PyErr_SetObject(PyExc_KeyError, key);
549 return -1;
550 }
551
552 memo_id = PyTuple_GetItem(value, 0);
553 if (memo_id == NULL)
554 return -1;
555
556 if (!PyLong_Check(memo_id)) {
557 PyErr_SetString(PicklingError, "memo id must be an integer");
558 return -1;
559 }
560 x = PyLong_AsLong(memo_id);
561 if (x == -1 && PyErr_Occurred())
562 return -1;
563
564 if (!self->bin) {
565 pdata[0] = GET;
566 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
567 len = (int)strlen(pdata);
568 }
569 else {
570 if (x < 256) {
571 pdata[0] = BINGET;
572 pdata[1] = (unsigned char)(x & 0xff);
573 len = 2;
574 }
575 else if (x <= 0xffffffffL) {
576 pdata[0] = LONG_BINGET;
577 pdata[1] = (unsigned char)(x & 0xff);
578 pdata[2] = (unsigned char)((x >> 8) & 0xff);
579 pdata[3] = (unsigned char)((x >> 16) & 0xff);
580 pdata[4] = (unsigned char)((x >> 24) & 0xff);
581 len = 5;
582 }
583 else { /* unlikely */
584 PyErr_SetString(PicklingError,
585 "memo id too large for LONG_BINGET");
586 return -1;
587 }
588 }
589
590 if (pickler_write(self, pdata, len) < 0)
591 return -1;
592
593 return 0;
594}
595
596/* Store an object in the memo, assign it a new unique ID based on the number
597 of objects currently stored in the memo and generate a PUT opcode. */
598static int
599memo_put(PicklerObject *self, PyObject *obj)
600{
601 PyObject *key = NULL;
602 PyObject *memo_id = NULL;
603 PyObject *tuple = NULL;
604 long x;
605 char pdata[30];
606 int len;
607 int status = 0;
608
609 if (self->fast)
610 return 0;
611
612 key = PyLong_FromVoidPtr(obj);
613 if (key == NULL)
614 goto error;
615 if ((x = PyDict_Size(self->memo)) < 0)
616 goto error;
617 memo_id = PyLong_FromLong(x);
618 if (memo_id == NULL)
619 goto error;
620 tuple = PyTuple_New(2);
621 if (tuple == NULL)
622 goto error;
623
624 Py_INCREF(memo_id);
625 PyTuple_SET_ITEM(tuple, 0, memo_id);
626 Py_INCREF(obj);
627 PyTuple_SET_ITEM(tuple, 1, obj);
628 if (PyDict_SetItem(self->memo, key, tuple) < 0)
629 goto error;
630
631 if (!self->bin) {
632 pdata[0] = PUT;
633 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
634 len = strlen(pdata);
635 }
636 else {
637 if (x < 256) {
638 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +0000639 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000640 len = 2;
641 }
642 else if (x <= 0xffffffffL) {
643 pdata[0] = LONG_BINPUT;
644 pdata[1] = (unsigned char)(x & 0xff);
645 pdata[2] = (unsigned char)((x >> 8) & 0xff);
646 pdata[3] = (unsigned char)((x >> 16) & 0xff);
647 pdata[4] = (unsigned char)((x >> 24) & 0xff);
648 len = 5;
649 }
650 else { /* unlikely */
651 PyErr_SetString(PicklingError,
652 "memo id too large for LONG_BINPUT");
653 return -1;
654 }
655 }
656
657 if (pickler_write(self, pdata, len) < 0)
658 goto error;
659
660 if (0) {
661 error:
662 status = -1;
663 }
664
665 Py_XDECREF(key);
666 Py_XDECREF(memo_id);
667 Py_XDECREF(tuple);
668
669 return status;
670}
671
672static PyObject *
673whichmodule(PyObject *global, PyObject *global_name)
674{
675 Py_ssize_t i, j;
676 static PyObject *module_str = NULL;
677 static PyObject *main_str = NULL;
678 PyObject *module_name;
679 PyObject *modules_dict;
680 PyObject *module;
681 PyObject *obj;
682
683 if (module_str == NULL) {
684 module_str = PyUnicode_InternFromString("__module__");
685 if (module_str == NULL)
686 return NULL;
687 main_str = PyUnicode_InternFromString("__main__");
688 if (main_str == NULL)
689 return NULL;
690 }
691
692 module_name = PyObject_GetAttr(global, module_str);
693
694 /* In some rare cases (e.g., random.getrandbits), __module__ can be
695 None. If it is so, then search sys.modules for the module of
696 global. */
697 if (module_name == Py_None) {
698 Py_DECREF(module_name);
699 goto search;
700 }
701
702 if (module_name) {
703 return module_name;
704 }
705 if (PyErr_ExceptionMatches(PyExc_AttributeError))
706 PyErr_Clear();
707 else
708 return NULL;
709
710 search:
711 modules_dict = PySys_GetObject("modules");
712 if (modules_dict == NULL)
713 return NULL;
714
715 i = 0;
716 module_name = NULL;
717 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Mark Dickinson211c6252009-02-01 10:28:51 +0000718 if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000719 continue;
720
721 obj = PyObject_GetAttr(module, global_name);
722 if (obj == NULL) {
723 if (PyErr_ExceptionMatches(PyExc_AttributeError))
724 PyErr_Clear();
725 else
726 return NULL;
727 continue;
728 }
729
730 if (obj != global) {
731 Py_DECREF(obj);
732 continue;
733 }
734
735 Py_DECREF(obj);
736 break;
737 }
738
739 /* If no module is found, use __main__. */
740 if (!j) {
741 module_name = main_str;
742 }
743
744 Py_INCREF(module_name);
745 return module_name;
746}
747
748/* fast_save_enter() and fast_save_leave() are guards against recursive
749 objects when Pickler is used with the "fast mode" (i.e., with object
750 memoization disabled). If the nesting of a list or dict object exceed
751 FAST_NESTING_LIMIT, these guards will start keeping an internal
752 reference to the seen list or dict objects and check whether these objects
753 are recursive. These are not strictly necessary, since save() has a
754 hard-coded recursion limit, but they give a nicer error message than the
755 typical RuntimeError. */
756static int
757fast_save_enter(PicklerObject *self, PyObject *obj)
758{
759 /* if fast_nesting < 0, we're doing an error exit. */
760 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
761 PyObject *key = NULL;
762 if (self->fast_memo == NULL) {
763 self->fast_memo = PyDict_New();
764 if (self->fast_memo == NULL) {
765 self->fast_nesting = -1;
766 return 0;
767 }
768 }
769 key = PyLong_FromVoidPtr(obj);
770 if (key == NULL)
771 return 0;
772 if (PyDict_GetItem(self->fast_memo, key)) {
773 Py_DECREF(key);
774 PyErr_Format(PyExc_ValueError,
775 "fast mode: can't pickle cyclic objects "
776 "including object type %.200s at %p",
777 obj->ob_type->tp_name, obj);
778 self->fast_nesting = -1;
779 return 0;
780 }
781 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
782 Py_DECREF(key);
783 self->fast_nesting = -1;
784 return 0;
785 }
786 Py_DECREF(key);
787 }
788 return 1;
789}
790
791static int
792fast_save_leave(PicklerObject *self, PyObject *obj)
793{
794 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
795 PyObject *key = PyLong_FromVoidPtr(obj);
796 if (key == NULL)
797 return 0;
798 if (PyDict_DelItem(self->fast_memo, key) < 0) {
799 Py_DECREF(key);
800 return 0;
801 }
802 Py_DECREF(key);
803 }
804 return 1;
805}
806
807static int
808save_none(PicklerObject *self, PyObject *obj)
809{
810 const char none_op = NONE;
811 if (pickler_write(self, &none_op, 1) < 0)
812 return -1;
813
814 return 0;
815}
816
817static int
818save_bool(PicklerObject *self, PyObject *obj)
819{
820 static const char *buf[2] = { FALSE, TRUE };
821 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
822 int p = (obj == Py_True);
823
824 if (self->proto >= 2) {
825 const char bool_op = p ? NEWTRUE : NEWFALSE;
826 if (pickler_write(self, &bool_op, 1) < 0)
827 return -1;
828 }
829 else if (pickler_write(self, buf[p], len[p]) < 0)
830 return -1;
831
832 return 0;
833}
834
835static int
836save_int(PicklerObject *self, long x)
837{
838 char pdata[32];
839 int len = 0;
840
841 if (!self->bin
842#if SIZEOF_LONG > 4
843 || x > 0x7fffffffL || x < -0x80000000L
844#endif
845 ) {
846 /* Text-mode pickle, or long too big to fit in the 4-byte
847 * signed BININT format: store as a string.
848 */
Mark Dickinson8dd05142009-01-20 20:43:58 +0000849 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
850 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000851 if (pickler_write(self, pdata, strlen(pdata)) < 0)
852 return -1;
853 }
854 else {
855 /* Binary pickle and x fits in a signed 4-byte int. */
856 pdata[1] = (unsigned char)(x & 0xff);
857 pdata[2] = (unsigned char)((x >> 8) & 0xff);
858 pdata[3] = (unsigned char)((x >> 16) & 0xff);
859 pdata[4] = (unsigned char)((x >> 24) & 0xff);
860
861 if ((pdata[4] == 0) && (pdata[3] == 0)) {
862 if (pdata[2] == 0) {
863 pdata[0] = BININT1;
864 len = 2;
865 }
866 else {
867 pdata[0] = BININT2;
868 len = 3;
869 }
870 }
871 else {
872 pdata[0] = BININT;
873 len = 5;
874 }
875
876 if (pickler_write(self, pdata, len) < 0)
877 return -1;
878 }
879
880 return 0;
881}
882
883static int
884save_long(PicklerObject *self, PyObject *obj)
885{
886 PyObject *repr = NULL;
887 Py_ssize_t size;
888 long val = PyLong_AsLong(obj);
889 int status = 0;
890
891 const char long_op = LONG;
892
893 if (val == -1 && PyErr_Occurred()) {
894 /* out of range for int pickling */
895 PyErr_Clear();
896 }
897 else
898 return save_int(self, val);
899
900 if (self->proto >= 2) {
901 /* Linear-time pickling. */
902 size_t nbits;
903 size_t nbytes;
904 unsigned char *pdata;
905 char header[5];
906 int i;
907 int sign = _PyLong_Sign(obj);
908
909 if (sign == 0) {
910 header[0] = LONG1;
911 header[1] = 0; /* It's 0 -- an empty bytestring. */
912 if (pickler_write(self, header, 2) < 0)
913 goto error;
914 return 0;
915 }
916 nbits = _PyLong_NumBits(obj);
917 if (nbits == (size_t)-1 && PyErr_Occurred())
918 goto error;
919 /* How many bytes do we need? There are nbits >> 3 full
920 * bytes of data, and nbits & 7 leftover bits. If there
921 * are any leftover bits, then we clearly need another
922 * byte. Wnat's not so obvious is that we *probably*
923 * need another byte even if there aren't any leftovers:
924 * the most-significant bit of the most-significant byte
925 * acts like a sign bit, and it's usually got a sense
926 * opposite of the one we need. The exception is longs
927 * of the form -(2**(8*j-1)) for j > 0. Such a long is
928 * its own 256's-complement, so has the right sign bit
929 * even without the extra byte. That's a pain to check
930 * for in advance, though, so we always grab an extra
931 * byte at the start, and cut it back later if possible.
932 */
933 nbytes = (nbits >> 3) + 1;
934 if (nbytes > INT_MAX) {
935 PyErr_SetString(PyExc_OverflowError,
936 "long too large to pickle");
937 goto error;
938 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000939 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000940 if (repr == NULL)
941 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000942 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000943 i = _PyLong_AsByteArray((PyLongObject *)obj,
944 pdata, nbytes,
945 1 /* little endian */ , 1 /* signed */ );
946 if (i < 0)
947 goto error;
948 /* If the long is negative, this may be a byte more than
949 * needed. This is so iff the MSB is all redundant sign
950 * bits.
951 */
952 if (sign < 0 &&
953 nbytes > 1 &&
954 pdata[nbytes - 1] == 0xff &&
955 (pdata[nbytes - 2] & 0x80) != 0) {
956 nbytes--;
957 }
958
959 if (nbytes < 256) {
960 header[0] = LONG1;
961 header[1] = (unsigned char)nbytes;
962 size = 2;
963 }
964 else {
965 header[0] = LONG4;
966 size = (int)nbytes;
967 for (i = 1; i < 5; i++) {
968 header[i] = (unsigned char)(size & 0xff);
969 size >>= 8;
970 }
971 size = 5;
972 }
973 if (pickler_write(self, header, size) < 0 ||
974 pickler_write(self, (char *)pdata, (int)nbytes) < 0)
975 goto error;
976 }
977 else {
978 char *string;
979
Mark Dickinson8dd05142009-01-20 20:43:58 +0000980 /* proto < 2: write the repr and newline. This is quadratic-time (in
981 the number of digits), in both directions. We add a trailing 'L'
982 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000983
984 repr = PyObject_Repr(obj);
985 if (repr == NULL)
986 goto error;
987
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000988 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000989 if (string == NULL)
990 goto error;
991
992 if (pickler_write(self, &long_op, 1) < 0 ||
993 pickler_write(self, string, size) < 0 ||
Mark Dickinson8dd05142009-01-20 20:43:58 +0000994 pickler_write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000995 goto error;
996 }
997
998 if (0) {
999 error:
1000 status = -1;
1001 }
1002 Py_XDECREF(repr);
1003
1004 return status;
1005}
1006
1007static int
1008save_float(PicklerObject *self, PyObject *obj)
1009{
1010 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1011
1012 if (self->bin) {
1013 char pdata[9];
1014 pdata[0] = BINFLOAT;
1015 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1016 return -1;
1017 if (pickler_write(self, pdata, 9) < 0)
1018 return -1;
1019 }
1020 else {
1021 char pdata[250];
1022 pdata[0] = FLOAT;
1023 PyOS_ascii_formatd(pdata + 1, sizeof(pdata) - 2, "%.17g", x);
1024 /* Extend the formatted string with a newline character */
1025 strcat(pdata, "\n");
1026
1027 if (pickler_write(self, pdata, strlen(pdata)) < 0)
1028 return -1;
1029 }
1030
1031 return 0;
1032}
1033
1034static int
1035save_bytes(PicklerObject *self, PyObject *obj)
1036{
1037 if (self->proto < 3) {
1038 /* Older pickle protocols do not have an opcode for pickling bytes
1039 objects. Therefore, we need to fake the copy protocol (i.e.,
1040 the __reduce__ method) to permit bytes object unpickling. */
1041 PyObject *reduce_value = NULL;
1042 PyObject *bytelist = NULL;
1043 int status;
1044
1045 bytelist = PySequence_List(obj);
1046 if (bytelist == NULL)
1047 return -1;
1048
1049 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1050 bytelist);
1051 if (reduce_value == NULL) {
1052 Py_DECREF(bytelist);
1053 return -1;
1054 }
1055
1056 /* save_reduce() will memoize the object automatically. */
1057 status = save_reduce(self, reduce_value, obj);
1058 Py_DECREF(reduce_value);
1059 Py_DECREF(bytelist);
1060 return status;
1061 }
1062 else {
1063 Py_ssize_t size;
1064 char header[5];
1065 int len;
1066
1067 size = PyBytes_Size(obj);
1068 if (size < 0)
1069 return -1;
1070
1071 if (size < 256) {
1072 header[0] = SHORT_BINBYTES;
1073 header[1] = (unsigned char)size;
1074 len = 2;
1075 }
1076 else if (size <= 0xffffffffL) {
1077 header[0] = BINBYTES;
1078 header[1] = (unsigned char)(size & 0xff);
1079 header[2] = (unsigned char)((size >> 8) & 0xff);
1080 header[3] = (unsigned char)((size >> 16) & 0xff);
1081 header[4] = (unsigned char)((size >> 24) & 0xff);
1082 len = 5;
1083 }
1084 else {
1085 return -1; /* string too large */
1086 }
1087
1088 if (pickler_write(self, header, len) < 0)
1089 return -1;
1090
1091 if (pickler_write(self, PyBytes_AS_STRING(obj), size) < 0)
1092 return -1;
1093
1094 if (memo_put(self, obj) < 0)
1095 return -1;
1096
1097 return 0;
1098 }
1099}
1100
1101/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1102 backslash and newline characters to \uXXXX escapes. */
1103static PyObject *
1104raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1105{
1106 PyObject *repr, *result;
1107 char *p;
1108 char *q;
1109
1110 static const char *hexdigits = "0123456789abcdef";
1111
1112#ifdef Py_UNICODE_WIDE
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001113 const Py_ssize_t expandsize = 10;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001114#else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001115 const Py_ssize_t expandsize = 6;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001116#endif
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001117
1118 if (size > PY_SSIZE_T_MAX / expandsize)
1119 return PyErr_NoMemory();
1120
1121 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001122 if (repr == NULL)
1123 return NULL;
1124 if (size == 0)
1125 goto done;
1126
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001127 p = q = PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001128 while (size-- > 0) {
1129 Py_UNICODE ch = *s++;
1130#ifdef Py_UNICODE_WIDE
1131 /* Map 32-bit characters to '\Uxxxxxxxx' */
1132 if (ch >= 0x10000) {
1133 *p++ = '\\';
1134 *p++ = 'U';
1135 *p++ = hexdigits[(ch >> 28) & 0xf];
1136 *p++ = hexdigits[(ch >> 24) & 0xf];
1137 *p++ = hexdigits[(ch >> 20) & 0xf];
1138 *p++ = hexdigits[(ch >> 16) & 0xf];
1139 *p++ = hexdigits[(ch >> 12) & 0xf];
1140 *p++ = hexdigits[(ch >> 8) & 0xf];
1141 *p++ = hexdigits[(ch >> 4) & 0xf];
1142 *p++ = hexdigits[ch & 15];
1143 }
1144 else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001145#else
1146 /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
1147 if (ch >= 0xD800 && ch < 0xDC00) {
1148 Py_UNICODE ch2;
1149 Py_UCS4 ucs;
1150
1151 ch2 = *s++;
1152 size--;
1153 if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
1154 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
1155 *p++ = '\\';
1156 *p++ = 'U';
1157 *p++ = hexdigits[(ucs >> 28) & 0xf];
1158 *p++ = hexdigits[(ucs >> 24) & 0xf];
1159 *p++ = hexdigits[(ucs >> 20) & 0xf];
1160 *p++ = hexdigits[(ucs >> 16) & 0xf];
1161 *p++ = hexdigits[(ucs >> 12) & 0xf];
1162 *p++ = hexdigits[(ucs >> 8) & 0xf];
1163 *p++ = hexdigits[(ucs >> 4) & 0xf];
1164 *p++ = hexdigits[ucs & 0xf];
1165 continue;
1166 }
1167 /* Fall through: isolated surrogates are copied as-is */
1168 s--;
1169 size++;
1170 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001171#endif
1172 /* Map 16-bit characters to '\uxxxx' */
1173 if (ch >= 256 || ch == '\\' || ch == '\n') {
1174 *p++ = '\\';
1175 *p++ = 'u';
1176 *p++ = hexdigits[(ch >> 12) & 0xf];
1177 *p++ = hexdigits[(ch >> 8) & 0xf];
1178 *p++ = hexdigits[(ch >> 4) & 0xf];
1179 *p++ = hexdigits[ch & 15];
1180 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001181 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001182 else
1183 *p++ = (char) ch;
1184 }
1185 size = p - q;
1186
1187 done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001188 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001189 Py_DECREF(repr);
1190 return result;
1191}
1192
1193static int
1194save_unicode(PicklerObject *self, PyObject *obj)
1195{
1196 Py_ssize_t size;
1197 PyObject *encoded = NULL;
1198
1199 if (self->bin) {
1200 char pdata[5];
1201
1202 encoded = PyUnicode_AsUTF8String(obj);
1203 if (encoded == NULL)
1204 goto error;
1205
1206 size = PyBytes_GET_SIZE(encoded);
1207 if (size < 0 || size > 0xffffffffL)
1208 goto error; /* string too large */
1209
1210 pdata[0] = BINUNICODE;
1211 pdata[1] = (unsigned char)(size & 0xff);
1212 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1213 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1214 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1215
1216 if (pickler_write(self, pdata, 5) < 0)
1217 goto error;
1218
1219 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1220 goto error;
1221 }
1222 else {
1223 const char unicode_op = UNICODE;
1224
1225 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1226 PyUnicode_GET_SIZE(obj));
1227 if (encoded == NULL)
1228 goto error;
1229
1230 if (pickler_write(self, &unicode_op, 1) < 0)
1231 goto error;
1232
1233 size = PyBytes_GET_SIZE(encoded);
1234 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1235 goto error;
1236
1237 if (pickler_write(self, "\n", 1) < 0)
1238 goto error;
1239 }
1240 if (memo_put(self, obj) < 0)
1241 goto error;
1242
1243 Py_DECREF(encoded);
1244 return 0;
1245
1246 error:
1247 Py_XDECREF(encoded);
1248 return -1;
1249}
1250
1251/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1252static int
1253store_tuple_elements(PicklerObject *self, PyObject *t, int len)
1254{
1255 int i;
1256
1257 assert(PyTuple_Size(t) == len);
1258
1259 for (i = 0; i < len; i++) {
1260 PyObject *element = PyTuple_GET_ITEM(t, i);
1261
1262 if (element == NULL)
1263 return -1;
1264 if (save(self, element, 0) < 0)
1265 return -1;
1266 }
1267
1268 return 0;
1269}
1270
1271/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1272 * used across protocols to minimize the space needed to pickle them.
1273 * Tuples are also the only builtin immutable type that can be recursive
1274 * (a tuple can be reached from itself), and that requires some subtle
1275 * magic so that it works in all cases. IOW, this is a long routine.
1276 */
1277static int
1278save_tuple(PicklerObject *self, PyObject *obj)
1279{
1280 PyObject *memo_key = NULL;
1281 int len, i;
1282 int status = 0;
1283
1284 const char mark_op = MARK;
1285 const char tuple_op = TUPLE;
1286 const char pop_op = POP;
1287 const char pop_mark_op = POP_MARK;
1288 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1289
1290 if ((len = PyTuple_Size(obj)) < 0)
1291 return -1;
1292
1293 if (len == 0) {
1294 char pdata[2];
1295
1296 if (self->proto) {
1297 pdata[0] = EMPTY_TUPLE;
1298 len = 1;
1299 }
1300 else {
1301 pdata[0] = MARK;
1302 pdata[1] = TUPLE;
1303 len = 2;
1304 }
1305 if (pickler_write(self, pdata, len) < 0)
1306 return -1;
1307 return 0;
1308 }
1309
1310 /* id(tuple) isn't in the memo now. If it shows up there after
1311 * saving the tuple elements, the tuple must be recursive, in
1312 * which case we'll pop everything we put on the stack, and fetch
1313 * its value from the memo.
1314 */
1315 memo_key = PyLong_FromVoidPtr(obj);
1316 if (memo_key == NULL)
1317 return -1;
1318
1319 if (len <= 3 && self->proto >= 2) {
1320 /* Use TUPLE{1,2,3} opcodes. */
1321 if (store_tuple_elements(self, obj, len) < 0)
1322 goto error;
1323
1324 if (PyDict_GetItem(self->memo, memo_key)) {
1325 /* pop the len elements */
1326 for (i = 0; i < len; i++)
1327 if (pickler_write(self, &pop_op, 1) < 0)
1328 goto error;
1329 /* fetch from memo */
1330 if (memo_get(self, memo_key) < 0)
1331 goto error;
1332
1333 Py_DECREF(memo_key);
1334 return 0;
1335 }
1336 else { /* Not recursive. */
1337 if (pickler_write(self, len2opcode + len, 1) < 0)
1338 goto error;
1339 }
1340 goto memoize;
1341 }
1342
1343 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
1344 * Generate MARK e1 e2 ... TUPLE
1345 */
1346 if (pickler_write(self, &mark_op, 1) < 0)
1347 goto error;
1348
1349 if (store_tuple_elements(self, obj, len) < 0)
1350 goto error;
1351
1352 if (PyDict_GetItem(self->memo, memo_key)) {
1353 /* pop the stack stuff we pushed */
1354 if (self->bin) {
1355 if (pickler_write(self, &pop_mark_op, 1) < 0)
1356 goto error;
1357 }
1358 else {
1359 /* Note that we pop one more than len, to remove
1360 * the MARK too.
1361 */
1362 for (i = 0; i <= len; i++)
1363 if (pickler_write(self, &pop_op, 1) < 0)
1364 goto error;
1365 }
1366 /* fetch from memo */
1367 if (memo_get(self, memo_key) < 0)
1368 goto error;
1369
1370 Py_DECREF(memo_key);
1371 return 0;
1372 }
1373 else { /* Not recursive. */
1374 if (pickler_write(self, &tuple_op, 1) < 0)
1375 goto error;
1376 }
1377
1378 memoize:
1379 if (memo_put(self, obj) < 0)
1380 goto error;
1381
1382 if (0) {
1383 error:
1384 status = -1;
1385 }
1386
1387 Py_DECREF(memo_key);
1388 return status;
1389}
1390
1391/* iter is an iterator giving items, and we batch up chunks of
1392 * MARK item item ... item APPENDS
1393 * opcode sequences. Calling code should have arranged to first create an
1394 * empty list, or list-like object, for the APPENDS to operate on.
1395 * Returns 0 on success, <0 on error.
1396 */
1397static int
1398batch_list(PicklerObject *self, PyObject *iter)
1399{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001400 PyObject *obj = NULL;
1401 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001402 int i, n;
1403
1404 const char mark_op = MARK;
1405 const char append_op = APPEND;
1406 const char appends_op = APPENDS;
1407
1408 assert(iter != NULL);
1409
1410 /* XXX: I think this function could be made faster by avoiding the
1411 iterator interface and fetching objects directly from list using
1412 PyList_GET_ITEM.
1413 */
1414
1415 if (self->proto == 0) {
1416 /* APPENDS isn't available; do one at a time. */
1417 for (;;) {
1418 obj = PyIter_Next(iter);
1419 if (obj == NULL) {
1420 if (PyErr_Occurred())
1421 return -1;
1422 break;
1423 }
1424 i = save(self, obj, 0);
1425 Py_DECREF(obj);
1426 if (i < 0)
1427 return -1;
1428 if (pickler_write(self, &append_op, 1) < 0)
1429 return -1;
1430 }
1431 return 0;
1432 }
1433
1434 /* proto > 0: write in batches of BATCHSIZE. */
1435 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001436 /* Get first item */
1437 firstitem = PyIter_Next(iter);
1438 if (firstitem == NULL) {
1439 if (PyErr_Occurred())
1440 goto error;
1441
1442 /* nothing more to add */
1443 break;
1444 }
1445
1446 /* Try to get a second item */
1447 obj = PyIter_Next(iter);
1448 if (obj == NULL) {
1449 if (PyErr_Occurred())
1450 goto error;
1451
1452 /* Only one item to write */
1453 if (save(self, firstitem, 0) < 0)
1454 goto error;
1455 if (pickler_write(self, &append_op, 1) < 0)
1456 goto error;
1457 Py_CLEAR(firstitem);
1458 break;
1459 }
1460
1461 /* More than one item to write */
1462
1463 /* Pump out MARK, items, APPENDS. */
1464 if (pickler_write(self, &mark_op, 1) < 0)
1465 goto error;
1466
1467 if (save(self, firstitem, 0) < 0)
1468 goto error;
1469 Py_CLEAR(firstitem);
1470 n = 1;
1471
1472 /* Fetch and save up to BATCHSIZE items */
1473 while (obj) {
1474 if (save(self, obj, 0) < 0)
1475 goto error;
1476 Py_CLEAR(obj);
1477 n += 1;
1478
1479 if (n == BATCHSIZE)
1480 break;
1481
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001482 obj = PyIter_Next(iter);
1483 if (obj == NULL) {
1484 if (PyErr_Occurred())
1485 goto error;
1486 break;
1487 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001488 }
1489
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001490 if (pickler_write(self, &appends_op, 1) < 0)
1491 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001492
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001493 } while (n == BATCHSIZE);
1494 return 0;
1495
1496 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001497 Py_XDECREF(firstitem);
1498 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001499 return -1;
1500}
1501
1502static int
1503save_list(PicklerObject *self, PyObject *obj)
1504{
1505 PyObject *iter;
1506 char header[3];
1507 int len;
1508 int status = 0;
1509
1510 if (self->fast && !fast_save_enter(self, obj))
1511 goto error;
1512
1513 /* Create an empty list. */
1514 if (self->bin) {
1515 header[0] = EMPTY_LIST;
1516 len = 1;
1517 }
1518 else {
1519 header[0] = MARK;
1520 header[1] = LIST;
1521 len = 2;
1522 }
1523
1524 if (pickler_write(self, header, len) < 0)
1525 goto error;
1526
1527 /* Get list length, and bow out early if empty. */
1528 if ((len = PyList_Size(obj)) < 0)
1529 goto error;
1530
1531 if (memo_put(self, obj) < 0)
1532 goto error;
1533
1534 if (len != 0) {
1535 /* Save the list elements. */
1536 iter = PyObject_GetIter(obj);
1537 if (iter == NULL)
1538 goto error;
1539 status = batch_list(self, iter);
1540 Py_DECREF(iter);
1541 }
1542
1543 if (0) {
1544 error:
1545 status = -1;
1546 }
1547
1548 if (self->fast && !fast_save_leave(self, obj))
1549 status = -1;
1550
1551 return status;
1552}
1553
1554/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
1555 * MARK key value ... key value SETITEMS
1556 * opcode sequences. Calling code should have arranged to first create an
1557 * empty dict, or dict-like object, for the SETITEMS to operate on.
1558 * Returns 0 on success, <0 on error.
1559 *
1560 * This is very much like batch_list(). The difference between saving
1561 * elements directly, and picking apart two-tuples, is so long-winded at
1562 * the C level, though, that attempts to combine these routines were too
1563 * ugly to bear.
1564 */
1565static int
1566batch_dict(PicklerObject *self, PyObject *iter)
1567{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001568 PyObject *obj = NULL;
1569 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001570 int i, n;
1571
1572 const char mark_op = MARK;
1573 const char setitem_op = SETITEM;
1574 const char setitems_op = SETITEMS;
1575
1576 assert(iter != NULL);
1577
1578 if (self->proto == 0) {
1579 /* SETITEMS isn't available; do one at a time. */
1580 for (;;) {
1581 obj = PyIter_Next(iter);
1582 if (obj == NULL) {
1583 if (PyErr_Occurred())
1584 return -1;
1585 break;
1586 }
1587 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1588 PyErr_SetString(PyExc_TypeError, "dict items "
1589 "iterator must return 2-tuples");
1590 return -1;
1591 }
1592 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
1593 if (i >= 0)
1594 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
1595 Py_DECREF(obj);
1596 if (i < 0)
1597 return -1;
1598 if (pickler_write(self, &setitem_op, 1) < 0)
1599 return -1;
1600 }
1601 return 0;
1602 }
1603
1604 /* proto > 0: write in batches of BATCHSIZE. */
1605 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001606 /* Get first item */
1607 firstitem = PyIter_Next(iter);
1608 if (firstitem == NULL) {
1609 if (PyErr_Occurred())
1610 goto error;
1611
1612 /* nothing more to add */
1613 break;
1614 }
1615 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
1616 PyErr_SetString(PyExc_TypeError, "dict items "
1617 "iterator must return 2-tuples");
1618 goto error;
1619 }
1620
1621 /* Try to get a second item */
1622 obj = PyIter_Next(iter);
1623 if (obj == NULL) {
1624 if (PyErr_Occurred())
1625 goto error;
1626
1627 /* Only one item to write */
1628 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1629 goto error;
1630 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1631 goto error;
1632 if (pickler_write(self, &setitem_op, 1) < 0)
1633 goto error;
1634 Py_CLEAR(firstitem);
1635 break;
1636 }
1637
1638 /* More than one item to write */
1639
1640 /* Pump out MARK, items, SETITEMS. */
1641 if (pickler_write(self, &mark_op, 1) < 0)
1642 goto error;
1643
1644 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1645 goto error;
1646 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1647 goto error;
1648 Py_CLEAR(firstitem);
1649 n = 1;
1650
1651 /* Fetch and save up to BATCHSIZE items */
1652 while (obj) {
1653 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1654 PyErr_SetString(PyExc_TypeError, "dict items "
1655 "iterator must return 2-tuples");
1656 goto error;
1657 }
1658 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
1659 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
1660 goto error;
1661 Py_CLEAR(obj);
1662 n += 1;
1663
1664 if (n == BATCHSIZE)
1665 break;
1666
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001667 obj = PyIter_Next(iter);
1668 if (obj == NULL) {
1669 if (PyErr_Occurred())
1670 goto error;
1671 break;
1672 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001673 }
1674
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001675 if (pickler_write(self, &setitems_op, 1) < 0)
1676 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001677
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001678 } while (n == BATCHSIZE);
1679 return 0;
1680
1681 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001682 Py_XDECREF(firstitem);
1683 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001684 return -1;
1685}
1686
1687static int
1688save_dict(PicklerObject *self, PyObject *obj)
1689{
1690 PyObject *items, *iter;
1691 char header[3];
1692 int len;
1693 int status = 0;
1694
1695 if (self->fast && !fast_save_enter(self, obj))
1696 goto error;
1697
1698 /* Create an empty dict. */
1699 if (self->bin) {
1700 header[0] = EMPTY_DICT;
1701 len = 1;
1702 }
1703 else {
1704 header[0] = MARK;
1705 header[1] = DICT;
1706 len = 2;
1707 }
1708
1709 if (pickler_write(self, header, len) < 0)
1710 goto error;
1711
1712 /* Get dict size, and bow out early if empty. */
1713 if ((len = PyDict_Size(obj)) < 0)
1714 goto error;
1715
1716 if (memo_put(self, obj) < 0)
1717 goto error;
1718
1719 if (len != 0) {
1720 /* Save the dict items. */
1721 items = PyObject_CallMethod(obj, "items", "()");
1722 if (items == NULL)
1723 goto error;
1724 iter = PyObject_GetIter(items);
1725 Py_DECREF(items);
1726 if (iter == NULL)
1727 goto error;
1728 status = batch_dict(self, iter);
1729 Py_DECREF(iter);
1730 }
1731
1732 if (0) {
1733 error:
1734 status = -1;
1735 }
1736
1737 if (self->fast && !fast_save_leave(self, obj))
1738 status = -1;
1739
1740 return status;
1741}
1742
1743static int
1744save_global(PicklerObject *self, PyObject *obj, PyObject *name)
1745{
1746 static PyObject *name_str = NULL;
1747 PyObject *global_name = NULL;
1748 PyObject *module_name = NULL;
1749 PyObject *module = NULL;
1750 PyObject *cls;
1751 int status = 0;
1752
1753 const char global_op = GLOBAL;
1754
1755 if (name_str == NULL) {
1756 name_str = PyUnicode_InternFromString("__name__");
1757 if (name_str == NULL)
1758 goto error;
1759 }
1760
1761 if (name) {
1762 global_name = name;
1763 Py_INCREF(global_name);
1764 }
1765 else {
1766 global_name = PyObject_GetAttr(obj, name_str);
1767 if (global_name == NULL)
1768 goto error;
1769 }
1770
1771 module_name = whichmodule(obj, global_name);
1772 if (module_name == NULL)
1773 goto error;
1774
1775 /* XXX: Change to use the import C API directly with level=0 to disallow
1776 relative imports.
1777
1778 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
1779 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
1780 custom import functions (IMHO, this would be a nice security
1781 feature). The import C API would need to be extended to support the
1782 extra parameters of __import__ to fix that. */
1783 module = PyImport_Import(module_name);
1784 if (module == NULL) {
1785 PyErr_Format(PicklingError,
1786 "Can't pickle %R: import of module %R failed",
1787 obj, module_name);
1788 goto error;
1789 }
1790 cls = PyObject_GetAttr(module, global_name);
1791 if (cls == NULL) {
1792 PyErr_Format(PicklingError,
1793 "Can't pickle %R: attribute lookup %S.%S failed",
1794 obj, module_name, global_name);
1795 goto error;
1796 }
1797 if (cls != obj) {
1798 Py_DECREF(cls);
1799 PyErr_Format(PicklingError,
1800 "Can't pickle %R: it's not the same object as %S.%S",
1801 obj, module_name, global_name);
1802 goto error;
1803 }
1804 Py_DECREF(cls);
1805
1806 if (self->proto >= 2) {
1807 /* See whether this is in the extension registry, and if
1808 * so generate an EXT opcode.
1809 */
1810 PyObject *code_obj; /* extension code as Python object */
1811 long code; /* extension code as C value */
1812 char pdata[5];
1813 int n;
1814
1815 PyTuple_SET_ITEM(two_tuple, 0, module_name);
1816 PyTuple_SET_ITEM(two_tuple, 1, global_name);
1817 code_obj = PyDict_GetItem(extension_registry, two_tuple);
1818 /* The object is not registered in the extension registry.
1819 This is the most likely code path. */
1820 if (code_obj == NULL)
1821 goto gen_global;
1822
1823 /* XXX: pickle.py doesn't check neither the type, nor the range
1824 of the value returned by the extension_registry. It should for
1825 consistency. */
1826
1827 /* Verify code_obj has the right type and value. */
1828 if (!PyLong_Check(code_obj)) {
1829 PyErr_Format(PicklingError,
1830 "Can't pickle %R: extension code %R isn't an integer",
1831 obj, code_obj);
1832 goto error;
1833 }
1834 code = PyLong_AS_LONG(code_obj);
1835 if (code <= 0 || code > 0x7fffffffL) {
1836 PyErr_Format(PicklingError,
1837 "Can't pickle %R: extension code %ld is out of range",
1838 obj, code);
1839 goto error;
1840 }
1841
1842 /* Generate an EXT opcode. */
1843 if (code <= 0xff) {
1844 pdata[0] = EXT1;
1845 pdata[1] = (unsigned char)code;
1846 n = 2;
1847 }
1848 else if (code <= 0xffff) {
1849 pdata[0] = EXT2;
1850 pdata[1] = (unsigned char)(code & 0xff);
1851 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1852 n = 3;
1853 }
1854 else {
1855 pdata[0] = EXT4;
1856 pdata[1] = (unsigned char)(code & 0xff);
1857 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1858 pdata[3] = (unsigned char)((code >> 16) & 0xff);
1859 pdata[4] = (unsigned char)((code >> 24) & 0xff);
1860 n = 5;
1861 }
1862
1863 if (pickler_write(self, pdata, n) < 0)
1864 goto error;
1865 }
1866 else {
1867 /* Generate a normal global opcode if we are using a pickle
1868 protocol <= 2, or if the object is not registered in the
1869 extension registry. */
1870 PyObject *encoded;
1871 PyObject *(*unicode_encoder)(PyObject *);
1872
1873 gen_global:
1874 if (pickler_write(self, &global_op, 1) < 0)
1875 goto error;
1876
1877 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
1878 the module name and the global name using UTF-8. We do so only when
1879 we are using the pickle protocol newer than version 3. This is to
1880 ensure compatibility with older Unpickler running on Python 2.x. */
1881 if (self->proto >= 3) {
1882 unicode_encoder = PyUnicode_AsUTF8String;
1883 }
1884 else {
1885 unicode_encoder = PyUnicode_AsASCIIString;
1886 }
1887
1888 /* Save the name of the module. */
1889 encoded = unicode_encoder(module_name);
1890 if (encoded == NULL) {
1891 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1892 PyErr_Format(PicklingError,
1893 "can't pickle module identifier '%S' using "
1894 "pickle protocol %i", module_name, self->proto);
1895 goto error;
1896 }
1897 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1898 PyBytes_GET_SIZE(encoded)) < 0) {
1899 Py_DECREF(encoded);
1900 goto error;
1901 }
1902 Py_DECREF(encoded);
1903 if(pickler_write(self, "\n", 1) < 0)
1904 goto error;
1905
1906 /* Save the name of the module. */
1907 encoded = unicode_encoder(global_name);
1908 if (encoded == NULL) {
1909 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1910 PyErr_Format(PicklingError,
1911 "can't pickle global identifier '%S' using "
1912 "pickle protocol %i", global_name, self->proto);
1913 goto error;
1914 }
1915 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1916 PyBytes_GET_SIZE(encoded)) < 0) {
1917 Py_DECREF(encoded);
1918 goto error;
1919 }
1920 Py_DECREF(encoded);
1921 if(pickler_write(self, "\n", 1) < 0)
1922 goto error;
1923
1924 /* Memoize the object. */
1925 if (memo_put(self, obj) < 0)
1926 goto error;
1927 }
1928
1929 if (0) {
1930 error:
1931 status = -1;
1932 }
1933 Py_XDECREF(module_name);
1934 Py_XDECREF(global_name);
1935 Py_XDECREF(module);
1936
1937 return status;
1938}
1939
1940static int
1941save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
1942{
1943 PyObject *pid = NULL;
1944 int status = 0;
1945
1946 const char persid_op = PERSID;
1947 const char binpersid_op = BINPERSID;
1948
1949 Py_INCREF(obj);
1950 pid = pickler_call(self, func, obj);
1951 if (pid == NULL)
1952 return -1;
1953
1954 if (pid != Py_None) {
1955 if (self->bin) {
1956 if (save(self, pid, 1) < 0 ||
1957 pickler_write(self, &binpersid_op, 1) < 0)
1958 goto error;
1959 }
1960 else {
1961 PyObject *pid_str = NULL;
1962 char *pid_ascii_bytes;
1963 Py_ssize_t size;
1964
1965 pid_str = PyObject_Str(pid);
1966 if (pid_str == NULL)
1967 goto error;
1968
1969 /* XXX: Should it check whether the persistent id only contains
1970 ASCII characters? And what if the pid contains embedded
1971 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001972 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001973 Py_DECREF(pid_str);
1974 if (pid_ascii_bytes == NULL)
1975 goto error;
1976
1977 if (pickler_write(self, &persid_op, 1) < 0 ||
1978 pickler_write(self, pid_ascii_bytes, size) < 0 ||
1979 pickler_write(self, "\n", 1) < 0)
1980 goto error;
1981 }
1982 status = 1;
1983 }
1984
1985 if (0) {
1986 error:
1987 status = -1;
1988 }
1989 Py_XDECREF(pid);
1990
1991 return status;
1992}
1993
1994/* We're saving obj, and args is the 2-thru-5 tuple returned by the
1995 * appropriate __reduce__ method for obj.
1996 */
1997static int
1998save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
1999{
2000 PyObject *callable;
2001 PyObject *argtup;
2002 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002003 PyObject *listitems = Py_None;
2004 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002005 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002006
2007 int use_newobj = self->proto >= 2;
2008
2009 const char reduce_op = REDUCE;
2010 const char build_op = BUILD;
2011 const char newobj_op = NEWOBJ;
2012
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002013 size = PyTuple_Size(args);
2014 if (size < 2 || size > 5) {
2015 PyErr_SetString(PicklingError, "tuple returned by "
2016 "__reduce__ must contain 2 through 5 elements");
2017 return -1;
2018 }
2019
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002020 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2021 &callable, &argtup, &state, &listitems, &dictitems))
2022 return -1;
2023
2024 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002025 PyErr_SetString(PicklingError, "first item of the tuple "
2026 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002027 return -1;
2028 }
2029 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002030 PyErr_SetString(PicklingError, "second item of the tuple "
2031 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002032 return -1;
2033 }
2034
2035 if (state == Py_None)
2036 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002037
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002038 if (listitems == Py_None)
2039 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002040 else if (!PyIter_Check(listitems)) {
2041 PyErr_Format(PicklingError, "Fourth element of tuple"
2042 "returned by __reduce__ must be an iterator, not %s",
2043 Py_TYPE(listitems)->tp_name);
2044 return -1;
2045 }
2046
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002047 if (dictitems == Py_None)
2048 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002049 else if (!PyIter_Check(dictitems)) {
2050 PyErr_Format(PicklingError, "Fifth element of tuple"
2051 "returned by __reduce__ must be an iterator, not %s",
2052 Py_TYPE(dictitems)->tp_name);
2053 return -1;
2054 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002055
2056 /* Protocol 2 special case: if callable's name is __newobj__, use
2057 NEWOBJ. */
2058 if (use_newobj) {
2059 static PyObject *newobj_str = NULL;
2060 PyObject *name_str;
2061
2062 if (newobj_str == NULL) {
2063 newobj_str = PyUnicode_InternFromString("__newobj__");
2064 }
2065
2066 name_str = PyObject_GetAttrString(callable, "__name__");
2067 if (name_str == NULL) {
2068 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2069 PyErr_Clear();
2070 else
2071 return -1;
2072 use_newobj = 0;
2073 }
2074 else {
2075 use_newobj = PyUnicode_Check(name_str) &&
2076 PyUnicode_Compare(name_str, newobj_str) == 0;
2077 Py_DECREF(name_str);
2078 }
2079 }
2080 if (use_newobj) {
2081 PyObject *cls;
2082 PyObject *newargtup;
2083 PyObject *obj_class;
2084 int p;
2085
2086 /* Sanity checks. */
2087 if (Py_SIZE(argtup) < 1) {
2088 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2089 return -1;
2090 }
2091
2092 cls = PyTuple_GET_ITEM(argtup, 0);
2093 if (!PyObject_HasAttrString(cls, "__new__")) {
2094 PyErr_SetString(PicklingError, "args[0] from "
2095 "__newobj__ args has no __new__");
2096 return -1;
2097 }
2098
2099 if (obj != NULL) {
2100 obj_class = PyObject_GetAttrString(obj, "__class__");
2101 if (obj_class == NULL) {
2102 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2103 PyErr_Clear();
2104 else
2105 return -1;
2106 }
2107 p = obj_class != cls; /* true iff a problem */
2108 Py_DECREF(obj_class);
2109 if (p) {
2110 PyErr_SetString(PicklingError, "args[0] from "
2111 "__newobj__ args has the wrong class");
2112 return -1;
2113 }
2114 }
2115 /* XXX: These calls save() are prone to infinite recursion. Imagine
2116 what happen if the value returned by the __reduce__() method of
2117 some extension type contains another object of the same type. Ouch!
2118
2119 Here is a quick example, that I ran into, to illustrate what I
2120 mean:
2121
2122 >>> import pickle, copyreg
2123 >>> copyreg.dispatch_table.pop(complex)
2124 >>> pickle.dumps(1+2j)
2125 Traceback (most recent call last):
2126 ...
2127 RuntimeError: maximum recursion depth exceeded
2128
2129 Removing the complex class from copyreg.dispatch_table made the
2130 __reduce_ex__() method emit another complex object:
2131
2132 >>> (1+1j).__reduce_ex__(2)
2133 (<function __newobj__ at 0xb7b71c3c>,
2134 (<class 'complex'>, (1+1j)), None, None, None)
2135
2136 Thus when save() was called on newargstup (the 2nd item) recursion
2137 ensued. Of course, the bug was in the complex class which had a
2138 broken __getnewargs__() that emitted another complex object. But,
2139 the point, here, is it is quite easy to end up with a broken reduce
2140 function. */
2141
2142 /* Save the class and its __new__ arguments. */
2143 if (save(self, cls, 0) < 0)
2144 return -1;
2145
2146 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
2147 if (newargtup == NULL)
2148 return -1;
2149
2150 p = save(self, newargtup, 0);
2151 Py_DECREF(newargtup);
2152 if (p < 0)
2153 return -1;
2154
2155 /* Add NEWOBJ opcode. */
2156 if (pickler_write(self, &newobj_op, 1) < 0)
2157 return -1;
2158 }
2159 else { /* Not using NEWOBJ. */
2160 if (save(self, callable, 0) < 0 ||
2161 save(self, argtup, 0) < 0 ||
2162 pickler_write(self, &reduce_op, 1) < 0)
2163 return -1;
2164 }
2165
2166 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
2167 the caller do not want to memoize the object. Not particularly useful,
2168 but that is to mimic the behavior save_reduce() in pickle.py when
2169 obj is None. */
2170 if (obj && memo_put(self, obj) < 0)
2171 return -1;
2172
2173 if (listitems && batch_list(self, listitems) < 0)
2174 return -1;
2175
2176 if (dictitems && batch_dict(self, dictitems) < 0)
2177 return -1;
2178
2179 if (state) {
2180 if (save(self, state, 0) < 0 ||
2181 pickler_write(self, &build_op, 1) < 0)
2182 return -1;
2183 }
2184
2185 return 0;
2186}
2187
2188static int
2189save(PicklerObject *self, PyObject *obj, int pers_save)
2190{
2191 PyTypeObject *type;
2192 PyObject *reduce_func = NULL;
2193 PyObject *reduce_value = NULL;
2194 PyObject *memo_key = NULL;
2195 int status = 0;
2196
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002197 if (Py_EnterRecursiveCall(" while pickling an object") < 0)
2198 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002199
2200 /* The extra pers_save argument is necessary to avoid calling save_pers()
2201 on its returned object. */
2202 if (!pers_save && self->pers_func) {
2203 /* save_pers() returns:
2204 -1 to signal an error;
2205 0 if it did nothing successfully;
2206 1 if a persistent id was saved.
2207 */
2208 if ((status = save_pers(self, obj, self->pers_func)) != 0)
2209 goto done;
2210 }
2211
2212 type = Py_TYPE(obj);
2213
2214 /* XXX: The old cPickle had an optimization that used switch-case
2215 statement dispatching on the first letter of the type name. It was
2216 probably not a bad idea after all. If benchmarks shows that particular
2217 optimization had some real benefits, it would be nice to add it
2218 back. */
2219
2220 /* Atom types; these aren't memoized, so don't check the memo. */
2221
2222 if (obj == Py_None) {
2223 status = save_none(self, obj);
2224 goto done;
2225 }
2226 else if (obj == Py_False || obj == Py_True) {
2227 status = save_bool(self, obj);
2228 goto done;
2229 }
2230 else if (type == &PyLong_Type) {
2231 status = save_long(self, obj);
2232 goto done;
2233 }
2234 else if (type == &PyFloat_Type) {
2235 status = save_float(self, obj);
2236 goto done;
2237 }
2238
2239 /* Check the memo to see if it has the object. If so, generate
2240 a GET (or BINGET) opcode, instead of pickling the object
2241 once again. */
2242 memo_key = PyLong_FromVoidPtr(obj);
2243 if (memo_key == NULL)
2244 goto error;
2245 if (PyDict_GetItem(self->memo, memo_key)) {
2246 if (memo_get(self, memo_key) < 0)
2247 goto error;
2248 goto done;
2249 }
2250
2251 if (type == &PyBytes_Type) {
2252 status = save_bytes(self, obj);
2253 goto done;
2254 }
2255 else if (type == &PyUnicode_Type) {
2256 status = save_unicode(self, obj);
2257 goto done;
2258 }
2259 else if (type == &PyDict_Type) {
2260 status = save_dict(self, obj);
2261 goto done;
2262 }
2263 else if (type == &PyList_Type) {
2264 status = save_list(self, obj);
2265 goto done;
2266 }
2267 else if (type == &PyTuple_Type) {
2268 status = save_tuple(self, obj);
2269 goto done;
2270 }
2271 else if (type == &PyType_Type) {
2272 status = save_global(self, obj, NULL);
2273 goto done;
2274 }
2275 else if (type == &PyFunction_Type) {
2276 status = save_global(self, obj, NULL);
2277 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
2278 /* fall back to reduce */
2279 PyErr_Clear();
2280 }
2281 else {
2282 goto done;
2283 }
2284 }
2285 else if (type == &PyCFunction_Type) {
2286 status = save_global(self, obj, NULL);
2287 goto done;
2288 }
2289 else if (PyType_IsSubtype(type, &PyType_Type)) {
2290 status = save_global(self, obj, NULL);
2291 goto done;
2292 }
2293
2294 /* XXX: This part needs some unit tests. */
2295
2296 /* Get a reduction callable, and call it. This may come from
2297 * copyreg.dispatch_table, the object's __reduce_ex__ method,
2298 * or the object's __reduce__ method.
2299 */
2300 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
2301 if (reduce_func != NULL) {
2302 /* Here, the reference count of the reduce_func object returned by
2303 PyDict_GetItem needs to be increased to be consistent with the one
2304 returned by PyObject_GetAttr. This is allow us to blindly DECREF
2305 reduce_func at the end of the save() routine.
2306 */
2307 Py_INCREF(reduce_func);
2308 Py_INCREF(obj);
2309 reduce_value = pickler_call(self, reduce_func, obj);
2310 }
2311 else {
2312 static PyObject *reduce_str = NULL;
2313 static PyObject *reduce_ex_str = NULL;
2314
2315 /* Cache the name of the reduce methods. */
2316 if (reduce_str == NULL) {
2317 reduce_str = PyUnicode_InternFromString("__reduce__");
2318 if (reduce_str == NULL)
2319 goto error;
2320 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
2321 if (reduce_ex_str == NULL)
2322 goto error;
2323 }
2324
2325 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
2326 automatically defined as __reduce__. While this is convenient, this
2327 make it impossible to know which method was actually called. Of
2328 course, this is not a big deal. But still, it would be nice to let
2329 the user know which method was called when something go
2330 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
2331 don't actually have to check for a __reduce__ method. */
2332
2333 /* Check for a __reduce_ex__ method. */
2334 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
2335 if (reduce_func != NULL) {
2336 PyObject *proto;
2337 proto = PyLong_FromLong(self->proto);
2338 if (proto != NULL) {
2339 reduce_value = pickler_call(self, reduce_func, proto);
2340 }
2341 }
2342 else {
2343 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2344 PyErr_Clear();
2345 else
2346 goto error;
2347 /* Check for a __reduce__ method. */
2348 reduce_func = PyObject_GetAttr(obj, reduce_str);
2349 if (reduce_func != NULL) {
2350 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
2351 }
2352 else {
2353 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
2354 type->tp_name, obj);
2355 goto error;
2356 }
2357 }
2358 }
2359
2360 if (reduce_value == NULL)
2361 goto error;
2362
2363 if (PyUnicode_Check(reduce_value)) {
2364 status = save_global(self, obj, reduce_value);
2365 goto done;
2366 }
2367
2368 if (!PyTuple_Check(reduce_value)) {
2369 PyErr_SetString(PicklingError,
2370 "__reduce__ must return a string or tuple");
2371 goto error;
2372 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002373
2374 status = save_reduce(self, reduce_value, obj);
2375
2376 if (0) {
2377 error:
2378 status = -1;
2379 }
2380 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002381 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002382 Py_XDECREF(memo_key);
2383 Py_XDECREF(reduce_func);
2384 Py_XDECREF(reduce_value);
2385
2386 return status;
2387}
2388
2389static int
2390dump(PicklerObject *self, PyObject *obj)
2391{
2392 const char stop_op = STOP;
2393
2394 if (self->proto >= 2) {
2395 char header[2];
2396
2397 header[0] = PROTO;
2398 assert(self->proto >= 0 && self->proto < 256);
2399 header[1] = (unsigned char)self->proto;
2400 if (pickler_write(self, header, 2) < 0)
2401 return -1;
2402 }
2403
2404 if (save(self, obj, 0) < 0 ||
2405 pickler_write(self, &stop_op, 1) < 0 ||
2406 pickler_write(self, NULL, 0) < 0)
2407 return -1;
2408
2409 return 0;
2410}
2411
2412PyDoc_STRVAR(Pickler_clear_memo_doc,
2413"clear_memo() -> None. Clears the pickler's \"memo\"."
2414"\n"
2415"The memo is the data structure that remembers which objects the\n"
2416"pickler has already seen, so that shared or recursive objects are\n"
2417"pickled by reference and not by value. This method is useful when\n"
2418"re-using picklers.");
2419
2420static PyObject *
2421Pickler_clear_memo(PicklerObject *self)
2422{
2423 if (self->memo)
2424 PyDict_Clear(self->memo);
2425
2426 Py_RETURN_NONE;
2427}
2428
2429PyDoc_STRVAR(Pickler_dump_doc,
2430"dump(obj) -> None. Write a pickled representation of obj to the open file.");
2431
2432static PyObject *
2433Pickler_dump(PicklerObject *self, PyObject *args)
2434{
2435 PyObject *obj;
2436
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00002437 /* Check whether the Pickler was initialized correctly (issue3664).
2438 Developers often forget to call __init__() in their subclasses, which
2439 would trigger a segfault without this check. */
2440 if (self->write == NULL) {
2441 PyErr_Format(PicklingError,
2442 "Pickler.__init__() was not called by %s.__init__()",
2443 Py_TYPE(self)->tp_name);
2444 return NULL;
2445 }
2446
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002447 if (!PyArg_ParseTuple(args, "O:dump", &obj))
2448 return NULL;
2449
2450 if (dump(self, obj) < 0)
2451 return NULL;
2452
2453 Py_RETURN_NONE;
2454}
2455
2456static struct PyMethodDef Pickler_methods[] = {
2457 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
2458 Pickler_dump_doc},
2459 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
2460 Pickler_clear_memo_doc},
2461 {NULL, NULL} /* sentinel */
2462};
2463
2464static void
2465Pickler_dealloc(PicklerObject *self)
2466{
2467 PyObject_GC_UnTrack(self);
2468
2469 Py_XDECREF(self->write);
2470 Py_XDECREF(self->memo);
2471 Py_XDECREF(self->pers_func);
2472 Py_XDECREF(self->arg);
2473 Py_XDECREF(self->fast_memo);
2474
2475 PyMem_Free(self->write_buf);
2476
2477 Py_TYPE(self)->tp_free((PyObject *)self);
2478}
2479
2480static int
2481Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
2482{
2483 Py_VISIT(self->write);
2484 Py_VISIT(self->memo);
2485 Py_VISIT(self->pers_func);
2486 Py_VISIT(self->arg);
2487 Py_VISIT(self->fast_memo);
2488 return 0;
2489}
2490
2491static int
2492Pickler_clear(PicklerObject *self)
2493{
2494 Py_CLEAR(self->write);
2495 Py_CLEAR(self->memo);
2496 Py_CLEAR(self->pers_func);
2497 Py_CLEAR(self->arg);
2498 Py_CLEAR(self->fast_memo);
2499
2500 PyMem_Free(self->write_buf);
2501 self->write_buf = NULL;
2502
2503 return 0;
2504}
2505
2506PyDoc_STRVAR(Pickler_doc,
2507"Pickler(file, protocol=None)"
2508"\n"
2509"This takes a binary file for writing a pickle data stream.\n"
2510"\n"
2511"The optional protocol argument tells the pickler to use the\n"
2512"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
2513"protocol is 3; a backward-incompatible protocol designed for\n"
2514"Python 3.0.\n"
2515"\n"
2516"Specifying a negative protocol version selects the highest\n"
2517"protocol version supported. The higher the protocol used, the\n"
2518"more recent the version of Python needed to read the pickle\n"
2519"produced.\n"
2520"\n"
2521"The file argument must have a write() method that accepts a single\n"
2522"bytes argument. It can thus be a file object opened for binary\n"
2523"writing, a io.BytesIO instance, or any other custom object that\n"
2524"meets this interface.\n");
2525
2526static int
2527Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
2528{
2529 static char *kwlist[] = {"file", "protocol", 0};
2530 PyObject *file;
2531 PyObject *proto_obj = NULL;
2532 long proto = 0;
2533
2534 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:Pickler",
2535 kwlist, &file, &proto_obj))
2536 return -1;
2537
2538 /* In case of multiple __init__() calls, clear previous content. */
2539 if (self->write != NULL)
2540 (void)Pickler_clear(self);
2541
2542 if (proto_obj == NULL || proto_obj == Py_None)
2543 proto = DEFAULT_PROTOCOL;
2544 else
2545 proto = PyLong_AsLong(proto_obj);
2546
2547 if (proto < 0)
2548 proto = HIGHEST_PROTOCOL;
2549 if (proto > HIGHEST_PROTOCOL) {
2550 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
2551 HIGHEST_PROTOCOL);
2552 return -1;
2553 }
2554
2555 self->proto = proto;
2556 self->bin = proto > 0;
2557 self->arg = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002558 self->fast = 0;
2559 self->fast_nesting = 0;
2560 self->fast_memo = NULL;
2561
2562 if (!PyObject_HasAttrString(file, "write")) {
2563 PyErr_SetString(PyExc_TypeError,
2564 "file must have a 'write' attribute");
2565 return -1;
2566 }
2567 self->write = PyObject_GetAttrString(file, "write");
2568 if (self->write == NULL)
2569 return -1;
2570 self->buf_size = 0;
2571 self->write_buf = (char *)PyMem_Malloc(WRITE_BUF_SIZE);
2572 if (self->write_buf == NULL) {
2573 PyErr_NoMemory();
2574 return -1;
2575 }
2576 self->pers_func = NULL;
2577 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
2578 self->pers_func = PyObject_GetAttrString((PyObject *)self,
2579 "persistent_id");
2580 if (self->pers_func == NULL)
2581 return -1;
2582 }
2583 self->memo = PyDict_New();
2584 if (self->memo == NULL)
2585 return -1;
2586
2587 return 0;
2588}
2589
2590static PyObject *
2591Pickler_get_memo(PicklerObject *self)
2592{
2593 if (self->memo == NULL)
2594 PyErr_SetString(PyExc_AttributeError, "memo");
2595 else
2596 Py_INCREF(self->memo);
2597 return self->memo;
2598}
2599
2600static int
2601Pickler_set_memo(PicklerObject *self, PyObject *value)
2602{
2603 PyObject *tmp;
2604
2605 if (value == NULL) {
2606 PyErr_SetString(PyExc_TypeError,
2607 "attribute deletion is not supported");
2608 return -1;
2609 }
2610 if (!PyDict_Check(value)) {
2611 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
2612 return -1;
2613 }
2614
2615 tmp = self->memo;
2616 Py_INCREF(value);
2617 self->memo = value;
2618 Py_XDECREF(tmp);
2619
2620 return 0;
2621}
2622
2623static PyObject *
2624Pickler_get_persid(PicklerObject *self)
2625{
2626 if (self->pers_func == NULL)
2627 PyErr_SetString(PyExc_AttributeError, "persistent_id");
2628 else
2629 Py_INCREF(self->pers_func);
2630 return self->pers_func;
2631}
2632
2633static int
2634Pickler_set_persid(PicklerObject *self, PyObject *value)
2635{
2636 PyObject *tmp;
2637
2638 if (value == NULL) {
2639 PyErr_SetString(PyExc_TypeError,
2640 "attribute deletion is not supported");
2641 return -1;
2642 }
2643 if (!PyCallable_Check(value)) {
2644 PyErr_SetString(PyExc_TypeError,
2645 "persistent_id must be a callable taking one argument");
2646 return -1;
2647 }
2648
2649 tmp = self->pers_func;
2650 Py_INCREF(value);
2651 self->pers_func = value;
2652 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
2653
2654 return 0;
2655}
2656
2657static PyMemberDef Pickler_members[] = {
2658 {"bin", T_INT, offsetof(PicklerObject, bin)},
2659 {"fast", T_INT, offsetof(PicklerObject, fast)},
2660 {NULL}
2661};
2662
2663static PyGetSetDef Pickler_getsets[] = {
2664 {"memo", (getter)Pickler_get_memo,
2665 (setter)Pickler_set_memo},
2666 {"persistent_id", (getter)Pickler_get_persid,
2667 (setter)Pickler_set_persid},
2668 {NULL}
2669};
2670
2671static PyTypeObject Pickler_Type = {
2672 PyVarObject_HEAD_INIT(NULL, 0)
2673 "_pickle.Pickler" , /*tp_name*/
2674 sizeof(PicklerObject), /*tp_basicsize*/
2675 0, /*tp_itemsize*/
2676 (destructor)Pickler_dealloc, /*tp_dealloc*/
2677 0, /*tp_print*/
2678 0, /*tp_getattr*/
2679 0, /*tp_setattr*/
2680 0, /*tp_compare*/
2681 0, /*tp_repr*/
2682 0, /*tp_as_number*/
2683 0, /*tp_as_sequence*/
2684 0, /*tp_as_mapping*/
2685 0, /*tp_hash*/
2686 0, /*tp_call*/
2687 0, /*tp_str*/
2688 0, /*tp_getattro*/
2689 0, /*tp_setattro*/
2690 0, /*tp_as_buffer*/
2691 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2692 Pickler_doc, /*tp_doc*/
2693 (traverseproc)Pickler_traverse, /*tp_traverse*/
2694 (inquiry)Pickler_clear, /*tp_clear*/
2695 0, /*tp_richcompare*/
2696 0, /*tp_weaklistoffset*/
2697 0, /*tp_iter*/
2698 0, /*tp_iternext*/
2699 Pickler_methods, /*tp_methods*/
2700 Pickler_members, /*tp_members*/
2701 Pickler_getsets, /*tp_getset*/
2702 0, /*tp_base*/
2703 0, /*tp_dict*/
2704 0, /*tp_descr_get*/
2705 0, /*tp_descr_set*/
2706 0, /*tp_dictoffset*/
2707 (initproc)Pickler_init, /*tp_init*/
2708 PyType_GenericAlloc, /*tp_alloc*/
2709 PyType_GenericNew, /*tp_new*/
2710 PyObject_GC_Del, /*tp_free*/
2711 0, /*tp_is_gc*/
2712};
2713
2714/* Temporary helper for calling self.find_class().
2715
2716 XXX: It would be nice to able to avoid Python function call overhead, by
2717 using directly the C version of find_class(), when find_class() is not
2718 overridden by a subclass. Although, this could become rather hackish. A
2719 simpler optimization would be to call the C function when self is not a
2720 subclass instance. */
2721static PyObject *
2722find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
2723{
2724 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
2725 module_name, global_name);
2726}
2727
2728static int
2729marker(UnpicklerObject *self)
2730{
2731 if (self->num_marks < 1) {
2732 PyErr_SetString(UnpicklingError, "could not find MARK");
2733 return -1;
2734 }
2735
2736 return self->marks[--self->num_marks];
2737}
2738
2739static int
2740load_none(UnpicklerObject *self)
2741{
2742 PDATA_APPEND(self->stack, Py_None, -1);
2743 return 0;
2744}
2745
2746static int
2747bad_readline(void)
2748{
2749 PyErr_SetString(UnpicklingError, "pickle data was truncated");
2750 return -1;
2751}
2752
2753static int
2754load_int(UnpicklerObject *self)
2755{
2756 PyObject *value;
2757 char *endptr, *s;
2758 Py_ssize_t len;
2759 long x;
2760
2761 if ((len = unpickler_readline(self, &s)) < 0)
2762 return -1;
2763 if (len < 2)
2764 return bad_readline();
2765
2766 errno = 0;
2767 /* XXX: Should the base argument of strtol() be explicitly set to 10? */
2768 x = strtol(s, &endptr, 0);
2769
2770 if (errno || (*endptr != '\n') || (endptr[1] != '\0')) {
2771 /* Hm, maybe we've got something long. Let's try reading
2772 * it as a Python long object. */
2773 errno = 0;
2774 /* XXX: Same thing about the base here. */
2775 value = PyLong_FromString(s, NULL, 0);
2776 if (value == NULL) {
2777 PyErr_SetString(PyExc_ValueError,
2778 "could not convert string to int");
2779 return -1;
2780 }
2781 }
2782 else {
2783 if (len == 3 && (x == 0 || x == 1)) {
2784 if ((value = PyBool_FromLong(x)) == NULL)
2785 return -1;
2786 }
2787 else {
2788 if ((value = PyLong_FromLong(x)) == NULL)
2789 return -1;
2790 }
2791 }
2792
2793 PDATA_PUSH(self->stack, value, -1);
2794 return 0;
2795}
2796
2797static int
2798load_bool(UnpicklerObject *self, PyObject *boolean)
2799{
2800 assert(boolean == Py_True || boolean == Py_False);
2801 PDATA_APPEND(self->stack, boolean, -1);
2802 return 0;
2803}
2804
2805/* s contains x bytes of a little-endian integer. Return its value as a
2806 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
2807 * int, but when x is 4 it's a signed one. This is an historical source
2808 * of x-platform bugs.
2809 */
2810static long
2811calc_binint(char *bytes, int size)
2812{
2813 unsigned char *s = (unsigned char *)bytes;
2814 int i = size;
2815 long x = 0;
2816
2817 for (i = 0; i < size; i++) {
2818 x |= (long)s[i] << (i * 8);
2819 }
2820
2821 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
2822 * is signed, so on a box with longs bigger than 4 bytes we need
2823 * to extend a BININT's sign bit to the full width.
2824 */
2825 if (SIZEOF_LONG > 4 && size == 4) {
2826 x |= -(x & (1L << 31));
2827 }
2828
2829 return x;
2830}
2831
2832static int
2833load_binintx(UnpicklerObject *self, char *s, int size)
2834{
2835 PyObject *value;
2836 long x;
2837
2838 x = calc_binint(s, size);
2839
2840 if ((value = PyLong_FromLong(x)) == NULL)
2841 return -1;
2842
2843 PDATA_PUSH(self->stack, value, -1);
2844 return 0;
2845}
2846
2847static int
2848load_binint(UnpicklerObject *self)
2849{
2850 char *s;
2851
2852 if (unpickler_read(self, &s, 4) < 0)
2853 return -1;
2854
2855 return load_binintx(self, s, 4);
2856}
2857
2858static int
2859load_binint1(UnpicklerObject *self)
2860{
2861 char *s;
2862
2863 if (unpickler_read(self, &s, 1) < 0)
2864 return -1;
2865
2866 return load_binintx(self, s, 1);
2867}
2868
2869static int
2870load_binint2(UnpicklerObject *self)
2871{
2872 char *s;
2873
2874 if (unpickler_read(self, &s, 2) < 0)
2875 return -1;
2876
2877 return load_binintx(self, s, 2);
2878}
2879
2880static int
2881load_long(UnpicklerObject *self)
2882{
2883 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00002884 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002885 Py_ssize_t len;
2886
2887 if ((len = unpickler_readline(self, &s)) < 0)
2888 return -1;
2889 if (len < 2)
2890 return bad_readline();
2891
Mark Dickinson8dd05142009-01-20 20:43:58 +00002892 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
2893 the 'L' before calling PyLong_FromString. In order to maintain
2894 compatibility with Python 3.0.0, we don't actually *require*
2895 the 'L' to be present. */
2896 if (s[len-2] == 'L') {
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00002897 s[len-2] = '\0';
Mark Dickinson8dd05142009-01-20 20:43:58 +00002898 }
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00002899 /* XXX: Should the base argument explicitly set to 10? */
2900 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00002901 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002902 return -1;
2903
2904 PDATA_PUSH(self->stack, value, -1);
2905 return 0;
2906}
2907
2908/* 'size' bytes contain the # of bytes of little-endian 256's-complement
2909 * data following.
2910 */
2911static int
2912load_counted_long(UnpicklerObject *self, int size)
2913{
2914 PyObject *value;
2915 char *nbytes;
2916 char *pdata;
2917
2918 assert(size == 1 || size == 4);
2919 if (unpickler_read(self, &nbytes, size) < 0)
2920 return -1;
2921
2922 size = calc_binint(nbytes, size);
2923 if (size < 0) {
2924 /* Corrupt or hostile pickle -- we never write one like this */
2925 PyErr_SetString(UnpicklingError,
2926 "LONG pickle has negative byte count");
2927 return -1;
2928 }
2929
2930 if (size == 0)
2931 value = PyLong_FromLong(0L);
2932 else {
2933 /* Read the raw little-endian bytes and convert. */
2934 if (unpickler_read(self, &pdata, size) < 0)
2935 return -1;
2936 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
2937 1 /* little endian */ , 1 /* signed */ );
2938 }
2939 if (value == NULL)
2940 return -1;
2941 PDATA_PUSH(self->stack, value, -1);
2942 return 0;
2943}
2944
2945static int
2946load_float(UnpicklerObject *self)
2947{
2948 PyObject *value;
2949 char *endptr, *s;
2950 Py_ssize_t len;
2951 double d;
2952
2953 if ((len = unpickler_readline(self, &s)) < 0)
2954 return -1;
2955 if (len < 2)
2956 return bad_readline();
2957
2958 errno = 0;
2959 d = PyOS_ascii_strtod(s, &endptr);
2960
Mark Dickinsoncddcf442009-01-24 21:46:33 +00002961 if ((errno == ERANGE && !(fabs(d) <= 1.0)) ||
2962 (endptr[0] != '\n') || (endptr[1] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002963 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
2964 return -1;
2965 }
2966
2967 if ((value = PyFloat_FromDouble(d)) == NULL)
2968 return -1;
2969
2970 PDATA_PUSH(self->stack, value, -1);
2971 return 0;
2972}
2973
2974static int
2975load_binfloat(UnpicklerObject *self)
2976{
2977 PyObject *value;
2978 double x;
2979 char *s;
2980
2981 if (unpickler_read(self, &s, 8) < 0)
2982 return -1;
2983
2984 x = _PyFloat_Unpack8((unsigned char *)s, 0);
2985 if (x == -1.0 && PyErr_Occurred())
2986 return -1;
2987
2988 if ((value = PyFloat_FromDouble(x)) == NULL)
2989 return -1;
2990
2991 PDATA_PUSH(self->stack, value, -1);
2992 return 0;
2993}
2994
2995static int
2996load_string(UnpicklerObject *self)
2997{
2998 PyObject *bytes;
2999 PyObject *str = NULL;
3000 Py_ssize_t len;
3001 char *s, *p;
3002
3003 if ((len = unpickler_readline(self, &s)) < 0)
3004 return -1;
3005 if (len < 3)
3006 return bad_readline();
3007 if ((s = strdup(s)) == NULL) {
3008 PyErr_NoMemory();
3009 return -1;
3010 }
3011
3012 /* Strip outermost quotes */
3013 while (s[len - 1] <= ' ')
3014 len--;
3015 if (s[0] == '"' && s[len - 1] == '"') {
3016 s[len - 1] = '\0';
3017 p = s + 1;
3018 len -= 2;
3019 }
3020 else if (s[0] == '\'' && s[len - 1] == '\'') {
3021 s[len - 1] = '\0';
3022 p = s + 1;
3023 len -= 2;
3024 }
3025 else {
3026 free(s);
3027 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
3028 return -1;
3029 }
3030
3031 /* Use the PyBytes API to decode the string, since that is what is used
3032 to encode, and then coerce the result to Unicode. */
3033 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
3034 free(s);
3035 if (bytes == NULL)
3036 return -1;
3037 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
3038 Py_DECREF(bytes);
3039 if (str == NULL)
3040 return -1;
3041
3042 PDATA_PUSH(self->stack, str, -1);
3043 return 0;
3044}
3045
3046static int
3047load_binbytes(UnpicklerObject *self)
3048{
3049 PyObject *bytes;
3050 long x;
3051 char *s;
3052
3053 if (unpickler_read(self, &s, 4) < 0)
3054 return -1;
3055
3056 x = calc_binint(s, 4);
3057 if (x < 0) {
3058 PyErr_SetString(UnpicklingError,
3059 "BINBYTES pickle has negative byte count");
3060 return -1;
3061 }
3062
3063 if (unpickler_read(self, &s, x) < 0)
3064 return -1;
3065 bytes = PyBytes_FromStringAndSize(s, x);
3066 if (bytes == NULL)
3067 return -1;
3068
3069 PDATA_PUSH(self->stack, bytes, -1);
3070 return 0;
3071}
3072
3073static int
3074load_short_binbytes(UnpicklerObject *self)
3075{
3076 PyObject *bytes;
3077 unsigned char x;
3078 char *s;
3079
3080 if (unpickler_read(self, &s, 1) < 0)
3081 return -1;
3082
3083 x = (unsigned char)s[0];
3084
3085 if (unpickler_read(self, &s, x) < 0)
3086 return -1;
3087
3088 bytes = PyBytes_FromStringAndSize(s, x);
3089 if (bytes == NULL)
3090 return -1;
3091
3092 PDATA_PUSH(self->stack, bytes, -1);
3093 return 0;
3094}
3095
3096static int
3097load_binstring(UnpicklerObject *self)
3098{
3099 PyObject *str;
3100 long x;
3101 char *s;
3102
3103 if (unpickler_read(self, &s, 4) < 0)
3104 return -1;
3105
3106 x = calc_binint(s, 4);
3107 if (x < 0) {
3108 PyErr_SetString(UnpicklingError,
3109 "BINSTRING pickle has negative byte count");
3110 return -1;
3111 }
3112
3113 if (unpickler_read(self, &s, x) < 0)
3114 return -1;
3115
3116 /* Convert Python 2.x strings to unicode. */
3117 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3118 if (str == NULL)
3119 return -1;
3120
3121 PDATA_PUSH(self->stack, str, -1);
3122 return 0;
3123}
3124
3125static int
3126load_short_binstring(UnpicklerObject *self)
3127{
3128 PyObject *str;
3129 unsigned char x;
3130 char *s;
3131
3132 if (unpickler_read(self, &s, 1) < 0)
3133 return -1;
3134
3135 x = (unsigned char)s[0];
3136
3137 if (unpickler_read(self, &s, x) < 0)
3138 return -1;
3139
3140 /* Convert Python 2.x strings to unicode. */
3141 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3142 if (str == NULL)
3143 return -1;
3144
3145 PDATA_PUSH(self->stack, str, -1);
3146 return 0;
3147}
3148
3149static int
3150load_unicode(UnpicklerObject *self)
3151{
3152 PyObject *str;
3153 Py_ssize_t len;
3154 char *s;
3155
3156 if ((len = unpickler_readline(self, &s)) < 0)
3157 return -1;
3158 if (len < 1)
3159 return bad_readline();
3160
3161 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
3162 if (str == NULL)
3163 return -1;
3164
3165 PDATA_PUSH(self->stack, str, -1);
3166 return 0;
3167}
3168
3169static int
3170load_binunicode(UnpicklerObject *self)
3171{
3172 PyObject *str;
3173 long size;
3174 char *s;
3175
3176 if (unpickler_read(self, &s, 4) < 0)
3177 return -1;
3178
3179 size = calc_binint(s, 4);
3180 if (size < 0) {
3181 PyErr_SetString(UnpicklingError,
3182 "BINUNICODE pickle has negative byte count");
3183 return -1;
3184 }
3185
3186 if (unpickler_read(self, &s, size) < 0)
3187 return -1;
3188
3189 str = PyUnicode_DecodeUTF8(s, size, NULL);
3190 if (str == NULL)
3191 return -1;
3192
3193 PDATA_PUSH(self->stack, str, -1);
3194 return 0;
3195}
3196
3197static int
3198load_tuple(UnpicklerObject *self)
3199{
3200 PyObject *tuple;
3201 int i;
3202
3203 if ((i = marker(self)) < 0)
3204 return -1;
3205
3206 tuple = Pdata_poptuple(self->stack, i);
3207 if (tuple == NULL)
3208 return -1;
3209 PDATA_PUSH(self->stack, tuple, -1);
3210 return 0;
3211}
3212
3213static int
3214load_counted_tuple(UnpicklerObject *self, int len)
3215{
3216 PyObject *tuple;
3217
3218 tuple = PyTuple_New(len);
3219 if (tuple == NULL)
3220 return -1;
3221
3222 while (--len >= 0) {
3223 PyObject *item;
3224
3225 PDATA_POP(self->stack, item);
3226 if (item == NULL)
3227 return -1;
3228 PyTuple_SET_ITEM(tuple, len, item);
3229 }
3230 PDATA_PUSH(self->stack, tuple, -1);
3231 return 0;
3232}
3233
3234static int
3235load_empty_list(UnpicklerObject *self)
3236{
3237 PyObject *list;
3238
3239 if ((list = PyList_New(0)) == NULL)
3240 return -1;
3241 PDATA_PUSH(self->stack, list, -1);
3242 return 0;
3243}
3244
3245static int
3246load_empty_dict(UnpicklerObject *self)
3247{
3248 PyObject *dict;
3249
3250 if ((dict = PyDict_New()) == NULL)
3251 return -1;
3252 PDATA_PUSH(self->stack, dict, -1);
3253 return 0;
3254}
3255
3256static int
3257load_list(UnpicklerObject *self)
3258{
3259 PyObject *list;
3260 int i;
3261
3262 if ((i = marker(self)) < 0)
3263 return -1;
3264
3265 list = Pdata_poplist(self->stack, i);
3266 if (list == NULL)
3267 return -1;
3268 PDATA_PUSH(self->stack, list, -1);
3269 return 0;
3270}
3271
3272static int
3273load_dict(UnpicklerObject *self)
3274{
3275 PyObject *dict, *key, *value;
3276 int i, j, k;
3277
3278 if ((i = marker(self)) < 0)
3279 return -1;
3280 j = self->stack->length;
3281
3282 if ((dict = PyDict_New()) == NULL)
3283 return -1;
3284
3285 for (k = i + 1; k < j; k += 2) {
3286 key = self->stack->data[k - 1];
3287 value = self->stack->data[k];
3288 if (PyDict_SetItem(dict, key, value) < 0) {
3289 Py_DECREF(dict);
3290 return -1;
3291 }
3292 }
3293 Pdata_clear(self->stack, i);
3294 PDATA_PUSH(self->stack, dict, -1);
3295 return 0;
3296}
3297
3298static PyObject *
3299instantiate(PyObject *cls, PyObject *args)
3300{
3301 PyObject *r = NULL;
3302
3303 /* XXX: The pickle.py module does not create instances this way when the
3304 args tuple is empty. See Unpickler._instantiate(). */
3305 if ((r = PyObject_CallObject(cls, args)))
3306 return r;
3307
3308 /* XXX: Is this still nescessary? */
3309 {
3310 PyObject *tp, *v, *tb, *tmp_value;
3311
3312 PyErr_Fetch(&tp, &v, &tb);
3313 tmp_value = v;
3314 /* NULL occurs when there was a KeyboardInterrupt */
3315 if (tmp_value == NULL)
3316 tmp_value = Py_None;
3317 if ((r = PyTuple_Pack(3, tmp_value, cls, args))) {
3318 Py_XDECREF(v);
3319 v = r;
3320 }
3321 PyErr_Restore(tp, v, tb);
3322 }
3323 return NULL;
3324}
3325
3326static int
3327load_obj(UnpicklerObject *self)
3328{
3329 PyObject *cls, *args, *obj = NULL;
3330 int i;
3331
3332 if ((i = marker(self)) < 0)
3333 return -1;
3334
3335 args = Pdata_poptuple(self->stack, i + 1);
3336 if (args == NULL)
3337 return -1;
3338
3339 PDATA_POP(self->stack, cls);
3340 if (cls) {
3341 obj = instantiate(cls, args);
3342 Py_DECREF(cls);
3343 }
3344 Py_DECREF(args);
3345 if (obj == NULL)
3346 return -1;
3347
3348 PDATA_PUSH(self->stack, obj, -1);
3349 return 0;
3350}
3351
3352static int
3353load_inst(UnpicklerObject *self)
3354{
3355 PyObject *cls = NULL;
3356 PyObject *args = NULL;
3357 PyObject *obj = NULL;
3358 PyObject *module_name;
3359 PyObject *class_name;
3360 Py_ssize_t len;
3361 int i;
3362 char *s;
3363
3364 if ((i = marker(self)) < 0)
3365 return -1;
3366 if ((len = unpickler_readline(self, &s)) < 0)
3367 return -1;
3368 if (len < 2)
3369 return bad_readline();
3370
3371 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
3372 identifiers are permitted in Python 3.0, since the INST opcode is only
3373 supported by older protocols on Python 2.x. */
3374 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3375 if (module_name == NULL)
3376 return -1;
3377
3378 if ((len = unpickler_readline(self, &s)) >= 0) {
3379 if (len < 2)
3380 return bad_readline();
3381 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3382 if (class_name == NULL) {
3383 cls = find_class(self, module_name, class_name);
3384 Py_DECREF(class_name);
3385 }
3386 }
3387 Py_DECREF(module_name);
3388
3389 if (cls == NULL)
3390 return -1;
3391
3392 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
3393 obj = instantiate(cls, args);
3394 Py_DECREF(args);
3395 }
3396 Py_DECREF(cls);
3397
3398 if (obj == NULL)
3399 return -1;
3400
3401 PDATA_PUSH(self->stack, obj, -1);
3402 return 0;
3403}
3404
3405static int
3406load_newobj(UnpicklerObject *self)
3407{
3408 PyObject *args = NULL;
3409 PyObject *clsraw = NULL;
3410 PyTypeObject *cls; /* clsraw cast to its true type */
3411 PyObject *obj;
3412
3413 /* Stack is ... cls argtuple, and we want to call
3414 * cls.__new__(cls, *argtuple).
3415 */
3416 PDATA_POP(self->stack, args);
3417 if (args == NULL)
3418 goto error;
3419 if (!PyTuple_Check(args)) {
3420 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
3421 goto error;
3422 }
3423
3424 PDATA_POP(self->stack, clsraw);
3425 cls = (PyTypeObject *)clsraw;
3426 if (cls == NULL)
3427 goto error;
3428 if (!PyType_Check(cls)) {
3429 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3430 "isn't a type object");
3431 goto error;
3432 }
3433 if (cls->tp_new == NULL) {
3434 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3435 "has NULL tp_new");
3436 goto error;
3437 }
3438
3439 /* Call __new__. */
3440 obj = cls->tp_new(cls, args, NULL);
3441 if (obj == NULL)
3442 goto error;
3443
3444 Py_DECREF(args);
3445 Py_DECREF(clsraw);
3446 PDATA_PUSH(self->stack, obj, -1);
3447 return 0;
3448
3449 error:
3450 Py_XDECREF(args);
3451 Py_XDECREF(clsraw);
3452 return -1;
3453}
3454
3455static int
3456load_global(UnpicklerObject *self)
3457{
3458 PyObject *global = NULL;
3459 PyObject *module_name;
3460 PyObject *global_name;
3461 Py_ssize_t len;
3462 char *s;
3463
3464 if ((len = unpickler_readline(self, &s)) < 0)
3465 return -1;
3466 if (len < 2)
3467 return bad_readline();
3468 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3469 if (!module_name)
3470 return -1;
3471
3472 if ((len = unpickler_readline(self, &s)) >= 0) {
3473 if (len < 2) {
3474 Py_DECREF(module_name);
3475 return bad_readline();
3476 }
3477 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3478 if (global_name) {
3479 global = find_class(self, module_name, global_name);
3480 Py_DECREF(global_name);
3481 }
3482 }
3483 Py_DECREF(module_name);
3484
3485 if (global == NULL)
3486 return -1;
3487 PDATA_PUSH(self->stack, global, -1);
3488 return 0;
3489}
3490
3491static int
3492load_persid(UnpicklerObject *self)
3493{
3494 PyObject *pid;
3495 Py_ssize_t len;
3496 char *s;
3497
3498 if (self->pers_func) {
3499 if ((len = unpickler_readline(self, &s)) < 0)
3500 return -1;
3501 if (len < 2)
3502 return bad_readline();
3503
3504 pid = PyBytes_FromStringAndSize(s, len - 1);
3505 if (pid == NULL)
3506 return -1;
3507
3508 /* Ugh... this does not leak since unpickler_call() steals the
3509 reference to pid first. */
3510 pid = unpickler_call(self, self->pers_func, pid);
3511 if (pid == NULL)
3512 return -1;
3513
3514 PDATA_PUSH(self->stack, pid, -1);
3515 return 0;
3516 }
3517 else {
3518 PyErr_SetString(UnpicklingError,
3519 "A load persistent id instruction was encountered,\n"
3520 "but no persistent_load function was specified.");
3521 return -1;
3522 }
3523}
3524
3525static int
3526load_binpersid(UnpicklerObject *self)
3527{
3528 PyObject *pid;
3529
3530 if (self->pers_func) {
3531 PDATA_POP(self->stack, pid);
3532 if (pid == NULL)
3533 return -1;
3534
3535 /* Ugh... this does not leak since unpickler_call() steals the
3536 reference to pid first. */
3537 pid = unpickler_call(self, self->pers_func, pid);
3538 if (pid == NULL)
3539 return -1;
3540
3541 PDATA_PUSH(self->stack, pid, -1);
3542 return 0;
3543 }
3544 else {
3545 PyErr_SetString(UnpicklingError,
3546 "A load persistent id instruction was encountered,\n"
3547 "but no persistent_load function was specified.");
3548 return -1;
3549 }
3550}
3551
3552static int
3553load_pop(UnpicklerObject *self)
3554{
3555 int len;
3556
3557 if ((len = self->stack->length) <= 0)
3558 return stack_underflow();
3559
3560 /* Note that we split the (pickle.py) stack into two stacks,
3561 * an object stack and a mark stack. We have to be clever and
3562 * pop the right one. We do this by looking at the top of the
3563 * mark stack.
3564 */
3565
3566 if ((self->num_marks > 0) && (self->marks[self->num_marks - 1] == len))
3567 self->num_marks--;
3568 else {
3569 len--;
3570 Py_DECREF(self->stack->data[len]);
3571 self->stack->length = len;
3572 }
3573
3574 return 0;
3575}
3576
3577static int
3578load_pop_mark(UnpicklerObject *self)
3579{
3580 int i;
3581
3582 if ((i = marker(self)) < 0)
3583 return -1;
3584
3585 Pdata_clear(self->stack, i);
3586
3587 return 0;
3588}
3589
3590static int
3591load_dup(UnpicklerObject *self)
3592{
3593 PyObject *last;
3594 int len;
3595
3596 if ((len = self->stack->length) <= 0)
3597 return stack_underflow();
3598 last = self->stack->data[len - 1];
3599 PDATA_APPEND(self->stack, last, -1);
3600 return 0;
3601}
3602
3603static int
3604load_get(UnpicklerObject *self)
3605{
3606 PyObject *key, *value;
3607 Py_ssize_t len;
3608 char *s;
3609
3610 if ((len = unpickler_readline(self, &s)) < 0)
3611 return -1;
3612 if (len < 2)
3613 return bad_readline();
3614
3615 key = PyLong_FromString(s, NULL, 10);
3616 if (key == NULL)
3617 return -1;
3618
3619 value = PyDict_GetItemWithError(self->memo, key);
3620 if (value == NULL) {
3621 if (!PyErr_Occurred())
3622 PyErr_SetObject(PyExc_KeyError, key);
3623 Py_DECREF(key);
3624 return -1;
3625 }
3626 Py_DECREF(key);
3627
3628 PDATA_APPEND(self->stack, value, -1);
3629 return 0;
3630}
3631
3632static int
3633load_binget(UnpicklerObject *self)
3634{
3635 PyObject *key, *value;
3636 char *s;
3637
3638 if (unpickler_read(self, &s, 1) < 0)
3639 return -1;
3640
3641 /* Here, the unsigned cast is necessary to avoid negative values. */
3642 key = PyLong_FromLong((long)(unsigned char)s[0]);
3643 if (key == NULL)
3644 return -1;
3645
3646 value = PyDict_GetItemWithError(self->memo, key);
3647 if (value == NULL) {
3648 if (!PyErr_Occurred())
3649 PyErr_SetObject(PyExc_KeyError, key);
3650 Py_DECREF(key);
3651 return -1;
3652 }
3653 Py_DECREF(key);
3654
3655 PDATA_APPEND(self->stack, value, -1);
3656 return 0;
3657}
3658
3659static int
3660load_long_binget(UnpicklerObject *self)
3661{
3662 PyObject *key, *value;
3663 char *s;
3664 long k;
3665
3666 if (unpickler_read(self, &s, 4) < 0)
3667 return -1;
3668
3669 k = (long)(unsigned char)s[0];
3670 k |= (long)(unsigned char)s[1] << 8;
3671 k |= (long)(unsigned char)s[2] << 16;
3672 k |= (long)(unsigned char)s[3] << 24;
3673
3674 key = PyLong_FromLong(k);
3675 if (key == NULL)
3676 return -1;
3677
3678 value = PyDict_GetItemWithError(self->memo, key);
3679 if (value == NULL) {
3680 if (!PyErr_Occurred())
3681 PyErr_SetObject(PyExc_KeyError, key);
3682 Py_DECREF(key);
3683 return -1;
3684 }
3685 Py_DECREF(key);
3686
3687 PDATA_APPEND(self->stack, value, -1);
3688 return 0;
3689}
3690
3691/* Push an object from the extension registry (EXT[124]). nbytes is
3692 * the number of bytes following the opcode, holding the index (code) value.
3693 */
3694static int
3695load_extension(UnpicklerObject *self, int nbytes)
3696{
3697 char *codebytes; /* the nbytes bytes after the opcode */
3698 long code; /* calc_binint returns long */
3699 PyObject *py_code; /* code as a Python int */
3700 PyObject *obj; /* the object to push */
3701 PyObject *pair; /* (module_name, class_name) */
3702 PyObject *module_name, *class_name;
3703
3704 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
3705 if (unpickler_read(self, &codebytes, nbytes) < 0)
3706 return -1;
3707 code = calc_binint(codebytes, nbytes);
3708 if (code <= 0) { /* note that 0 is forbidden */
3709 /* Corrupt or hostile pickle. */
3710 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
3711 return -1;
3712 }
3713
3714 /* Look for the code in the cache. */
3715 py_code = PyLong_FromLong(code);
3716 if (py_code == NULL)
3717 return -1;
3718 obj = PyDict_GetItem(extension_cache, py_code);
3719 if (obj != NULL) {
3720 /* Bingo. */
3721 Py_DECREF(py_code);
3722 PDATA_APPEND(self->stack, obj, -1);
3723 return 0;
3724 }
3725
3726 /* Look up the (module_name, class_name) pair. */
3727 pair = PyDict_GetItem(inverted_registry, py_code);
3728 if (pair == NULL) {
3729 Py_DECREF(py_code);
3730 PyErr_Format(PyExc_ValueError, "unregistered extension "
3731 "code %ld", code);
3732 return -1;
3733 }
3734 /* Since the extension registry is manipulable via Python code,
3735 * confirm that pair is really a 2-tuple of strings.
3736 */
3737 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
3738 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
3739 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
3740 Py_DECREF(py_code);
3741 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
3742 "isn't a 2-tuple of strings", code);
3743 return -1;
3744 }
3745 /* Load the object. */
3746 obj = find_class(self, module_name, class_name);
3747 if (obj == NULL) {
3748 Py_DECREF(py_code);
3749 return -1;
3750 }
3751 /* Cache code -> obj. */
3752 code = PyDict_SetItem(extension_cache, py_code, obj);
3753 Py_DECREF(py_code);
3754 if (code < 0) {
3755 Py_DECREF(obj);
3756 return -1;
3757 }
3758 PDATA_PUSH(self->stack, obj, -1);
3759 return 0;
3760}
3761
3762static int
3763load_put(UnpicklerObject *self)
3764{
3765 PyObject *key, *value;
3766 Py_ssize_t len;
3767 char *s;
3768 int x;
3769
3770 if ((len = unpickler_readline(self, &s)) < 0)
3771 return -1;
3772 if (len < 2)
3773 return bad_readline();
3774 if ((x = self->stack->length) <= 0)
3775 return stack_underflow();
3776
3777 key = PyLong_FromString(s, NULL, 10);
3778 if (key == NULL)
3779 return -1;
3780 value = self->stack->data[x - 1];
3781
3782 x = PyDict_SetItem(self->memo, key, value);
3783 Py_DECREF(key);
3784 return x;
3785}
3786
3787static int
3788load_binput(UnpicklerObject *self)
3789{
3790 PyObject *key, *value;
3791 char *s;
3792 int x;
3793
3794 if (unpickler_read(self, &s, 1) < 0)
3795 return -1;
3796 if ((x = self->stack->length) <= 0)
3797 return stack_underflow();
3798
3799 key = PyLong_FromLong((long)(unsigned char)s[0]);
3800 if (key == NULL)
3801 return -1;
3802 value = self->stack->data[x - 1];
3803
3804 x = PyDict_SetItem(self->memo, key, value);
3805 Py_DECREF(key);
3806 return x;
3807}
3808
3809static int
3810load_long_binput(UnpicklerObject *self)
3811{
3812 PyObject *key, *value;
3813 long k;
3814 char *s;
3815 int x;
3816
3817 if (unpickler_read(self, &s, 4) < 0)
3818 return -1;
3819 if ((x = self->stack->length) <= 0)
3820 return stack_underflow();
3821
3822 k = (long)(unsigned char)s[0];
3823 k |= (long)(unsigned char)s[1] << 8;
3824 k |= (long)(unsigned char)s[2] << 16;
3825 k |= (long)(unsigned char)s[3] << 24;
3826
3827 key = PyLong_FromLong(k);
3828 if (key == NULL)
3829 return -1;
3830 value = self->stack->data[x - 1];
3831
3832 x = PyDict_SetItem(self->memo, key, value);
3833 Py_DECREF(key);
3834 return x;
3835}
3836
3837static int
3838do_append(UnpicklerObject *self, int x)
3839{
3840 PyObject *value;
3841 PyObject *list;
3842 int len, i;
3843
3844 len = self->stack->length;
3845 if (x > len || x <= 0)
3846 return stack_underflow();
3847 if (len == x) /* nothing to do */
3848 return 0;
3849
3850 list = self->stack->data[x - 1];
3851
3852 if (PyList_Check(list)) {
3853 PyObject *slice;
3854 Py_ssize_t list_len;
3855
3856 slice = Pdata_poplist(self->stack, x);
3857 if (!slice)
3858 return -1;
3859 list_len = PyList_GET_SIZE(list);
3860 i = PyList_SetSlice(list, list_len, list_len, slice);
3861 Py_DECREF(slice);
3862 return i;
3863 }
3864 else {
3865 PyObject *append_func;
3866
3867 append_func = PyObject_GetAttrString(list, "append");
3868 if (append_func == NULL)
3869 return -1;
3870 for (i = x; i < len; i++) {
3871 PyObject *result;
3872
3873 value = self->stack->data[i];
3874 result = unpickler_call(self, append_func, value);
3875 if (result == NULL) {
3876 Pdata_clear(self->stack, i + 1);
3877 self->stack->length = x;
3878 return -1;
3879 }
3880 Py_DECREF(result);
3881 }
3882 self->stack->length = x;
3883 }
3884
3885 return 0;
3886}
3887
3888static int
3889load_append(UnpicklerObject *self)
3890{
3891 return do_append(self, self->stack->length - 1);
3892}
3893
3894static int
3895load_appends(UnpicklerObject *self)
3896{
3897 return do_append(self, marker(self));
3898}
3899
3900static int
3901do_setitems(UnpicklerObject *self, int x)
3902{
3903 PyObject *value, *key;
3904 PyObject *dict;
3905 int len, i;
3906 int status = 0;
3907
3908 len = self->stack->length;
3909 if (x > len || x <= 0)
3910 return stack_underflow();
3911 if (len == x) /* nothing to do */
3912 return 0;
3913 if ((len - x) % 2 != 0) {
3914 /* Currupt or hostile pickle -- we never write one like this. */
3915 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
3916 return -1;
3917 }
3918
3919 /* Here, dict does not actually need to be a PyDict; it could be anything
3920 that supports the __setitem__ attribute. */
3921 dict = self->stack->data[x - 1];
3922
3923 for (i = x + 1; i < len; i += 2) {
3924 key = self->stack->data[i - 1];
3925 value = self->stack->data[i];
3926 if (PyObject_SetItem(dict, key, value) < 0) {
3927 status = -1;
3928 break;
3929 }
3930 }
3931
3932 Pdata_clear(self->stack, x);
3933 return status;
3934}
3935
3936static int
3937load_setitem(UnpicklerObject *self)
3938{
3939 return do_setitems(self, self->stack->length - 2);
3940}
3941
3942static int
3943load_setitems(UnpicklerObject *self)
3944{
3945 return do_setitems(self, marker(self));
3946}
3947
3948static int
3949load_build(UnpicklerObject *self)
3950{
3951 PyObject *state, *inst, *slotstate;
3952 PyObject *setstate;
3953 int status = 0;
3954
3955 /* Stack is ... instance, state. We want to leave instance at
3956 * the stack top, possibly mutated via instance.__setstate__(state).
3957 */
3958 if (self->stack->length < 2)
3959 return stack_underflow();
3960
3961 PDATA_POP(self->stack, state);
3962 if (state == NULL)
3963 return -1;
3964
3965 inst = self->stack->data[self->stack->length - 1];
3966
3967 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00003968 if (setstate == NULL) {
3969 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3970 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00003971 else {
3972 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00003973 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00003974 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003975 }
3976 else {
3977 PyObject *result;
3978
3979 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitroud79dc622008-09-05 00:03:33 +00003980 /* Ugh... this does not leak since unpickler_call() steals the
3981 reference to state first. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003982 result = unpickler_call(self, setstate, state);
3983 Py_DECREF(setstate);
3984 if (result == NULL)
3985 return -1;
3986 Py_DECREF(result);
3987 return 0;
3988 }
3989
3990 /* A default __setstate__. First see whether state embeds a
3991 * slot state dict too (a proto 2 addition).
3992 */
3993 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
3994 PyObject *tmp = state;
3995
3996 state = PyTuple_GET_ITEM(tmp, 0);
3997 slotstate = PyTuple_GET_ITEM(tmp, 1);
3998 Py_INCREF(state);
3999 Py_INCREF(slotstate);
4000 Py_DECREF(tmp);
4001 }
4002 else
4003 slotstate = NULL;
4004
4005 /* Set inst.__dict__ from the state dict (if any). */
4006 if (state != Py_None) {
4007 PyObject *dict;
4008
4009 if (!PyDict_Check(state)) {
4010 PyErr_SetString(UnpicklingError, "state is not a dictionary");
4011 goto error;
4012 }
4013 dict = PyObject_GetAttrString(inst, "__dict__");
4014 if (dict == NULL)
4015 goto error;
4016
4017 PyDict_Update(dict, state);
4018 Py_DECREF(dict);
4019 }
4020
4021 /* Also set instance attributes from the slotstate dict (if any). */
4022 if (slotstate != NULL) {
4023 PyObject *d_key, *d_value;
4024 Py_ssize_t i;
4025
4026 if (!PyDict_Check(slotstate)) {
4027 PyErr_SetString(UnpicklingError,
4028 "slot state is not a dictionary");
4029 goto error;
4030 }
4031 i = 0;
4032 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
4033 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
4034 goto error;
4035 }
4036 }
4037
4038 if (0) {
4039 error:
4040 status = -1;
4041 }
4042
4043 Py_DECREF(state);
4044 Py_XDECREF(slotstate);
4045 return status;
4046}
4047
4048static int
4049load_mark(UnpicklerObject *self)
4050{
4051
4052 /* Note that we split the (pickle.py) stack into two stacks, an
4053 * object stack and a mark stack. Here we push a mark onto the
4054 * mark stack.
4055 */
4056
4057 if ((self->num_marks + 1) >= self->marks_size) {
4058 size_t alloc;
4059 int *marks;
4060
4061 /* Use the size_t type to check for overflow. */
4062 alloc = ((size_t)self->num_marks << 1) + 20;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00004063 if (alloc > PY_SSIZE_T_MAX ||
4064 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004065 PyErr_NoMemory();
4066 return -1;
4067 }
4068
4069 if (self->marks == NULL)
4070 marks = (int *)PyMem_Malloc(alloc * sizeof(int));
4071 else
4072 marks = (int *)PyMem_Realloc(self->marks, alloc * sizeof(int));
4073 if (marks == NULL) {
4074 PyErr_NoMemory();
4075 return -1;
4076 }
4077 self->marks = marks;
4078 self->marks_size = (Py_ssize_t)alloc;
4079 }
4080
4081 self->marks[self->num_marks++] = self->stack->length;
4082
4083 return 0;
4084}
4085
4086static int
4087load_reduce(UnpicklerObject *self)
4088{
4089 PyObject *callable = NULL;
4090 PyObject *argtup = NULL;
4091 PyObject *obj = NULL;
4092
4093 PDATA_POP(self->stack, argtup);
4094 if (argtup == NULL)
4095 return -1;
4096 PDATA_POP(self->stack, callable);
4097 if (callable) {
4098 obj = instantiate(callable, argtup);
4099 Py_DECREF(callable);
4100 }
4101 Py_DECREF(argtup);
4102
4103 if (obj == NULL)
4104 return -1;
4105
4106 PDATA_PUSH(self->stack, obj, -1);
4107 return 0;
4108}
4109
4110/* Just raises an error if we don't know the protocol specified. PROTO
4111 * is the first opcode for protocols >= 2.
4112 */
4113static int
4114load_proto(UnpicklerObject *self)
4115{
4116 char *s;
4117 int i;
4118
4119 if (unpickler_read(self, &s, 1) < 0)
4120 return -1;
4121
4122 i = (unsigned char)s[0];
4123 if (i <= HIGHEST_PROTOCOL)
4124 return 0;
4125
4126 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
4127 return -1;
4128}
4129
4130static PyObject *
4131load(UnpicklerObject *self)
4132{
4133 PyObject *err;
4134 PyObject *value = NULL;
4135 char *s;
4136
4137 self->num_marks = 0;
4138 if (self->stack->length)
4139 Pdata_clear(self->stack, 0);
4140
4141 /* Convenient macros for the dispatch while-switch loop just below. */
4142#define OP(opcode, load_func) \
4143 case opcode: if (load_func(self) < 0) break; continue;
4144
4145#define OP_ARG(opcode, load_func, arg) \
4146 case opcode: if (load_func(self, (arg)) < 0) break; continue;
4147
4148 while (1) {
4149 if (unpickler_read(self, &s, 1) < 0)
4150 break;
4151
4152 switch ((enum opcode)s[0]) {
4153 OP(NONE, load_none)
4154 OP(BININT, load_binint)
4155 OP(BININT1, load_binint1)
4156 OP(BININT2, load_binint2)
4157 OP(INT, load_int)
4158 OP(LONG, load_long)
4159 OP_ARG(LONG1, load_counted_long, 1)
4160 OP_ARG(LONG4, load_counted_long, 4)
4161 OP(FLOAT, load_float)
4162 OP(BINFLOAT, load_binfloat)
4163 OP(BINBYTES, load_binbytes)
4164 OP(SHORT_BINBYTES, load_short_binbytes)
4165 OP(BINSTRING, load_binstring)
4166 OP(SHORT_BINSTRING, load_short_binstring)
4167 OP(STRING, load_string)
4168 OP(UNICODE, load_unicode)
4169 OP(BINUNICODE, load_binunicode)
4170 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
4171 OP_ARG(TUPLE1, load_counted_tuple, 1)
4172 OP_ARG(TUPLE2, load_counted_tuple, 2)
4173 OP_ARG(TUPLE3, load_counted_tuple, 3)
4174 OP(TUPLE, load_tuple)
4175 OP(EMPTY_LIST, load_empty_list)
4176 OP(LIST, load_list)
4177 OP(EMPTY_DICT, load_empty_dict)
4178 OP(DICT, load_dict)
4179 OP(OBJ, load_obj)
4180 OP(INST, load_inst)
4181 OP(NEWOBJ, load_newobj)
4182 OP(GLOBAL, load_global)
4183 OP(APPEND, load_append)
4184 OP(APPENDS, load_appends)
4185 OP(BUILD, load_build)
4186 OP(DUP, load_dup)
4187 OP(BINGET, load_binget)
4188 OP(LONG_BINGET, load_long_binget)
4189 OP(GET, load_get)
4190 OP(MARK, load_mark)
4191 OP(BINPUT, load_binput)
4192 OP(LONG_BINPUT, load_long_binput)
4193 OP(PUT, load_put)
4194 OP(POP, load_pop)
4195 OP(POP_MARK, load_pop_mark)
4196 OP(SETITEM, load_setitem)
4197 OP(SETITEMS, load_setitems)
4198 OP(PERSID, load_persid)
4199 OP(BINPERSID, load_binpersid)
4200 OP(REDUCE, load_reduce)
4201 OP(PROTO, load_proto)
4202 OP_ARG(EXT1, load_extension, 1)
4203 OP_ARG(EXT2, load_extension, 2)
4204 OP_ARG(EXT4, load_extension, 4)
4205 OP_ARG(NEWTRUE, load_bool, Py_True)
4206 OP_ARG(NEWFALSE, load_bool, Py_False)
4207
4208 case STOP:
4209 break;
4210
4211 case '\0':
4212 PyErr_SetNone(PyExc_EOFError);
4213 return NULL;
4214
4215 default:
4216 PyErr_Format(UnpicklingError,
4217 "invalid load key, '%c'.", s[0]);
4218 return NULL;
4219 }
4220
4221 break; /* and we are done! */
4222 }
4223
4224 /* XXX: It is not clear what this is actually for. */
4225 if ((err = PyErr_Occurred())) {
4226 if (err == PyExc_EOFError) {
4227 PyErr_SetNone(PyExc_EOFError);
4228 }
4229 return NULL;
4230 }
4231
4232 PDATA_POP(self->stack, value);
4233 return value;
4234}
4235
4236PyDoc_STRVAR(Unpickler_load_doc,
4237"load() -> object. Load a pickle."
4238"\n"
4239"Read a pickled object representation from the open file object given in\n"
4240"the constructor, and return the reconstituted object hierarchy specified\n"
4241"therein.\n");
4242
4243static PyObject *
4244Unpickler_load(UnpicklerObject *self)
4245{
4246 /* Check whether the Unpickler was initialized correctly. This prevents
4247 segfaulting if a subclass overridden __init__ with a function that does
4248 not call Unpickler.__init__(). Here, we simply ensure that self->read
4249 is not NULL. */
4250 if (self->read == NULL) {
4251 PyErr_Format(UnpicklingError,
4252 "Unpickler.__init__() was not called by %s.__init__()",
4253 Py_TYPE(self)->tp_name);
4254 return NULL;
4255 }
4256
4257 return load(self);
4258}
4259
4260/* The name of find_class() is misleading. In newer pickle protocols, this
4261 function is used for loading any global (i.e., functions), not just
4262 classes. The name is kept only for backward compatibility. */
4263
4264PyDoc_STRVAR(Unpickler_find_class_doc,
4265"find_class(module_name, global_name) -> object.\n"
4266"\n"
4267"Return an object from a specified module, importing the module if\n"
4268"necessary. Subclasses may override this method (e.g. to restrict\n"
4269"unpickling of arbitrary classes and functions).\n"
4270"\n"
4271"This method is called whenever a class or a function object is\n"
4272"needed. Both arguments passed are str objects.\n");
4273
4274static PyObject *
4275Unpickler_find_class(UnpicklerObject *self, PyObject *args)
4276{
4277 PyObject *global;
4278 PyObject *modules_dict;
4279 PyObject *module;
4280 PyObject *module_name, *global_name;
4281
4282 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
4283 &module_name, &global_name))
4284 return NULL;
4285
4286 modules_dict = PySys_GetObject("modules");
4287 if (modules_dict == NULL)
4288 return NULL;
4289
4290 module = PyDict_GetItem(modules_dict, module_name);
4291 if (module == NULL) {
4292 module = PyImport_Import(module_name);
4293 if (module == NULL)
4294 return NULL;
4295 global = PyObject_GetAttr(module, global_name);
4296 Py_DECREF(module);
4297 }
4298 else {
4299 global = PyObject_GetAttr(module, global_name);
4300 }
4301 return global;
4302}
4303
4304static struct PyMethodDef Unpickler_methods[] = {
4305 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
4306 Unpickler_load_doc},
4307 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
4308 Unpickler_find_class_doc},
4309 {NULL, NULL} /* sentinel */
4310};
4311
4312static void
4313Unpickler_dealloc(UnpicklerObject *self)
4314{
4315 PyObject_GC_UnTrack((PyObject *)self);
4316 Py_XDECREF(self->readline);
4317 Py_XDECREF(self->read);
4318 Py_XDECREF(self->memo);
4319 Py_XDECREF(self->stack);
4320 Py_XDECREF(self->pers_func);
4321 Py_XDECREF(self->arg);
4322 Py_XDECREF(self->last_string);
4323
4324 PyMem_Free(self->marks);
4325 free(self->encoding);
4326 free(self->errors);
4327
4328 Py_TYPE(self)->tp_free((PyObject *)self);
4329}
4330
4331static int
4332Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
4333{
4334 Py_VISIT(self->readline);
4335 Py_VISIT(self->read);
4336 Py_VISIT(self->memo);
4337 Py_VISIT(self->stack);
4338 Py_VISIT(self->pers_func);
4339 Py_VISIT(self->arg);
4340 Py_VISIT(self->last_string);
4341 return 0;
4342}
4343
4344static int
4345Unpickler_clear(UnpicklerObject *self)
4346{
4347 Py_CLEAR(self->readline);
4348 Py_CLEAR(self->read);
4349 Py_CLEAR(self->memo);
4350 Py_CLEAR(self->stack);
4351 Py_CLEAR(self->pers_func);
4352 Py_CLEAR(self->arg);
4353 Py_CLEAR(self->last_string);
4354
4355 PyMem_Free(self->marks);
4356 self->marks = NULL;
4357 free(self->encoding);
4358 self->encoding = NULL;
4359 free(self->errors);
4360 self->errors = NULL;
4361
4362 return 0;
4363}
4364
4365PyDoc_STRVAR(Unpickler_doc,
4366"Unpickler(file, *, encoding='ASCII', errors='strict')"
4367"\n"
4368"This takes a binary file for reading a pickle data stream.\n"
4369"\n"
4370"The protocol version of the pickle is detected automatically, so no\n"
4371"proto argument is needed.\n"
4372"\n"
4373"The file-like object must have two methods, a read() method\n"
4374"that takes an integer argument, and a readline() method that\n"
4375"requires no arguments. Both methods should return bytes.\n"
4376"Thus file-like object can be a binary file object opened for\n"
4377"reading, a BytesIO object, or any other custom object that\n"
4378"meets this interface.\n"
4379"\n"
4380"Optional keyword arguments are encoding and errors, which are\n"
4381"used to decode 8-bit string instances pickled by Python 2.x.\n"
4382"These default to 'ASCII' and 'strict', respectively.\n");
4383
4384static int
4385Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
4386{
4387 static char *kwlist[] = {"file", "encoding", "errors", 0};
4388 PyObject *file;
4389 char *encoding = NULL;
4390 char *errors = NULL;
4391
4392 /* XXX: That is an horrible error message. But, I don't know how to do
4393 better... */
4394 if (Py_SIZE(args) != 1) {
4395 PyErr_Format(PyExc_TypeError,
4396 "%s takes exactly one positional argument (%zd given)",
4397 Py_TYPE(self)->tp_name, Py_SIZE(args));
4398 return -1;
4399 }
4400
4401 /* Arguments parsing needs to be done in the __init__() method to allow
4402 subclasses to define their own __init__() method, which may (or may
4403 not) support Unpickler arguments. However, this means we need to be
4404 extra careful in the other Unpickler methods, since a subclass could
4405 forget to call Unpickler.__init__() thus breaking our internal
4406 invariants. */
4407 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist,
4408 &file, &encoding, &errors))
4409 return -1;
4410
4411 /* In case of multiple __init__() calls, clear previous content. */
4412 if (self->read != NULL)
4413 (void)Unpickler_clear(self);
4414
4415 self->read = PyObject_GetAttrString(file, "read");
4416 self->readline = PyObject_GetAttrString(file, "readline");
4417 if (self->readline == NULL || self->read == NULL)
4418 return -1;
4419
4420 if (encoding == NULL)
4421 encoding = "ASCII";
4422 if (errors == NULL)
4423 errors = "strict";
4424
4425 self->encoding = strdup(encoding);
4426 self->errors = strdup(errors);
4427 if (self->encoding == NULL || self->errors == NULL) {
4428 PyErr_NoMemory();
4429 return -1;
4430 }
4431
4432 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
4433 self->pers_func = PyObject_GetAttrString((PyObject *)self,
4434 "persistent_load");
4435 if (self->pers_func == NULL)
4436 return -1;
4437 }
4438 else {
4439 self->pers_func = NULL;
4440 }
4441
4442 self->stack = (Pdata *)Pdata_New();
4443 if (self->stack == NULL)
4444 return -1;
4445
4446 self->memo = PyDict_New();
4447 if (self->memo == NULL)
4448 return -1;
4449
4450 return 0;
4451}
4452
4453static PyObject *
4454Unpickler_get_memo(UnpicklerObject *self)
4455{
4456 if (self->memo == NULL)
4457 PyErr_SetString(PyExc_AttributeError, "memo");
4458 else
4459 Py_INCREF(self->memo);
4460 return self->memo;
4461}
4462
4463static int
4464Unpickler_set_memo(UnpicklerObject *self, PyObject *value)
4465{
4466 PyObject *tmp;
4467
4468 if (value == NULL) {
4469 PyErr_SetString(PyExc_TypeError,
4470 "attribute deletion is not supported");
4471 return -1;
4472 }
4473 if (!PyDict_Check(value)) {
4474 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
4475 return -1;
4476 }
4477
4478 tmp = self->memo;
4479 Py_INCREF(value);
4480 self->memo = value;
4481 Py_XDECREF(tmp);
4482
4483 return 0;
4484}
4485
4486static PyObject *
4487Unpickler_get_persload(UnpicklerObject *self)
4488{
4489 if (self->pers_func == NULL)
4490 PyErr_SetString(PyExc_AttributeError, "persistent_load");
4491 else
4492 Py_INCREF(self->pers_func);
4493 return self->pers_func;
4494}
4495
4496static int
4497Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
4498{
4499 PyObject *tmp;
4500
4501 if (value == NULL) {
4502 PyErr_SetString(PyExc_TypeError,
4503 "attribute deletion is not supported");
4504 return -1;
4505 }
4506 if (!PyCallable_Check(value)) {
4507 PyErr_SetString(PyExc_TypeError,
4508 "persistent_load must be a callable taking "
4509 "one argument");
4510 return -1;
4511 }
4512
4513 tmp = self->pers_func;
4514 Py_INCREF(value);
4515 self->pers_func = value;
4516 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
4517
4518 return 0;
4519}
4520
4521static PyGetSetDef Unpickler_getsets[] = {
4522 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
4523 {"persistent_load", (getter)Unpickler_get_persload,
4524 (setter)Unpickler_set_persload},
4525 {NULL}
4526};
4527
4528static PyTypeObject Unpickler_Type = {
4529 PyVarObject_HEAD_INIT(NULL, 0)
4530 "_pickle.Unpickler", /*tp_name*/
4531 sizeof(UnpicklerObject), /*tp_basicsize*/
4532 0, /*tp_itemsize*/
4533 (destructor)Unpickler_dealloc, /*tp_dealloc*/
4534 0, /*tp_print*/
4535 0, /*tp_getattr*/
4536 0, /*tp_setattr*/
4537 0, /*tp_compare*/
4538 0, /*tp_repr*/
4539 0, /*tp_as_number*/
4540 0, /*tp_as_sequence*/
4541 0, /*tp_as_mapping*/
4542 0, /*tp_hash*/
4543 0, /*tp_call*/
4544 0, /*tp_str*/
4545 0, /*tp_getattro*/
4546 0, /*tp_setattro*/
4547 0, /*tp_as_buffer*/
4548 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4549 Unpickler_doc, /*tp_doc*/
4550 (traverseproc)Unpickler_traverse, /*tp_traverse*/
4551 (inquiry)Unpickler_clear, /*tp_clear*/
4552 0, /*tp_richcompare*/
4553 0, /*tp_weaklistoffset*/
4554 0, /*tp_iter*/
4555 0, /*tp_iternext*/
4556 Unpickler_methods, /*tp_methods*/
4557 0, /*tp_members*/
4558 Unpickler_getsets, /*tp_getset*/
4559 0, /*tp_base*/
4560 0, /*tp_dict*/
4561 0, /*tp_descr_get*/
4562 0, /*tp_descr_set*/
4563 0, /*tp_dictoffset*/
4564 (initproc)Unpickler_init, /*tp_init*/
4565 PyType_GenericAlloc, /*tp_alloc*/
4566 PyType_GenericNew, /*tp_new*/
4567 PyObject_GC_Del, /*tp_free*/
4568 0, /*tp_is_gc*/
4569};
4570
4571static int
4572init_stuff(void)
4573{
4574 PyObject *copyreg;
4575
4576 copyreg = PyImport_ImportModule("copyreg");
4577 if (!copyreg)
4578 return -1;
4579
4580 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
4581 if (!dispatch_table)
4582 goto error;
4583
4584 extension_registry = \
4585 PyObject_GetAttrString(copyreg, "_extension_registry");
4586 if (!extension_registry)
4587 goto error;
4588
4589 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
4590 if (!inverted_registry)
4591 goto error;
4592
4593 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
4594 if (!extension_cache)
4595 goto error;
4596
4597 Py_DECREF(copyreg);
4598
4599 empty_tuple = PyTuple_New(0);
4600 if (empty_tuple == NULL)
4601 return -1;
4602
4603 two_tuple = PyTuple_New(2);
4604 if (two_tuple == NULL)
4605 return -1;
4606 /* We use this temp container with no regard to refcounts, or to
4607 * keeping containees alive. Exempt from GC, because we don't
4608 * want anything looking at two_tuple() by magic.
4609 */
4610 PyObject_GC_UnTrack(two_tuple);
4611
4612 return 0;
4613
4614 error:
4615 Py_DECREF(copyreg);
4616 return -1;
4617}
4618
4619static struct PyModuleDef _picklemodule = {
4620 PyModuleDef_HEAD_INIT,
4621 "_pickle",
4622 pickle_module_doc,
4623 -1,
4624 NULL,
4625 NULL,
4626 NULL,
4627 NULL,
4628 NULL
4629};
4630
4631PyMODINIT_FUNC
4632PyInit__pickle(void)
4633{
4634 PyObject *m;
4635
4636 if (PyType_Ready(&Unpickler_Type) < 0)
4637 return NULL;
4638 if (PyType_Ready(&Pickler_Type) < 0)
4639 return NULL;
4640 if (PyType_Ready(&Pdata_Type) < 0)
4641 return NULL;
4642
4643 /* Create the module and add the functions. */
4644 m = PyModule_Create(&_picklemodule);
4645 if (m == NULL)
4646 return NULL;
4647
4648 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
4649 return NULL;
4650 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
4651 return NULL;
4652
4653 /* Initialize the exceptions. */
4654 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
4655 if (PickleError == NULL)
4656 return NULL;
4657 PicklingError = \
4658 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
4659 if (PicklingError == NULL)
4660 return NULL;
4661 UnpicklingError = \
4662 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
4663 if (UnpicklingError == NULL)
4664 return NULL;
4665
4666 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
4667 return NULL;
4668 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
4669 return NULL;
4670 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
4671 return NULL;
4672
4673 if (init_stuff() < 0)
4674 return NULL;
4675
4676 return m;
4677}