blob: 6536e6f16041c56cc912ef4264f38d6a65cc69d2 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
13
14/* Pickle opcodes. These must be kept updated with pickle.py.
15 Extensive docs are in pickletools.py. */
16enum opcode {
17 MARK = '(',
18 STOP = '.',
19 POP = '0',
20 POP_MARK = '1',
21 DUP = '2',
22 FLOAT = 'F',
23 INT = 'I',
24 BININT = 'J',
25 BININT1 = 'K',
26 LONG = 'L',
27 BININT2 = 'M',
28 NONE = 'N',
29 PERSID = 'P',
30 BINPERSID = 'Q',
31 REDUCE = 'R',
32 STRING = 'S',
33 BINSTRING = 'T',
34 SHORT_BINSTRING = 'U',
35 UNICODE = 'V',
36 BINUNICODE = 'X',
37 APPEND = 'a',
38 BUILD = 'b',
39 GLOBAL = 'c',
40 DICT = 'd',
41 EMPTY_DICT = '}',
42 APPENDS = 'e',
43 GET = 'g',
44 BINGET = 'h',
45 INST = 'i',
46 LONG_BINGET = 'j',
47 LIST = 'l',
48 EMPTY_LIST = ']',
49 OBJ = 'o',
50 PUT = 'p',
51 BINPUT = 'q',
52 LONG_BINPUT = 'r',
53 SETITEM = 's',
54 TUPLE = 't',
55 EMPTY_TUPLE = ')',
56 SETITEMS = 'u',
57 BINFLOAT = 'G',
58
59 /* Protocol 2. */
60 PROTO = '\x80',
61 NEWOBJ = '\x81',
62 EXT1 = '\x82',
63 EXT2 = '\x83',
64 EXT4 = '\x84',
65 TUPLE1 = '\x85',
66 TUPLE2 = '\x86',
67 TUPLE3 = '\x87',
68 NEWTRUE = '\x88',
69 NEWFALSE = '\x89',
70 LONG1 = '\x8a',
71 LONG4 = '\x8b',
72
73 /* Protocol 3 (Python 3.x) */
74 BINBYTES = 'B',
75 SHORT_BINBYTES = 'C',
76};
77
78/* These aren't opcodes -- they're ways to pickle bools before protocol 2
79 * so that unpicklers written before bools were introduced unpickle them
80 * as ints, but unpicklers after can recognize that bools were intended.
81 * Note that protocol 2 added direct ways to pickle bools.
82 */
83#undef TRUE
84#define TRUE "I01\n"
85#undef FALSE
86#define FALSE "I00\n"
87
88enum {
89 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
90 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
91 break if this gets out of synch with pickle.py, but it's unclear that would
92 help anything either. */
93 BATCHSIZE = 1000,
94
95 /* Nesting limit until Pickler, when running in "fast mode", starts
96 checking for self-referential data-structures. */
97 FAST_NESTING_LIMIT = 50,
98
99 /* Size of the write buffer of Pickler. Higher values will reduce the
100 number of calls to the write() method of the output stream. */
101 WRITE_BUF_SIZE = 256,
102};
103
104/* Exception classes for pickle. These should override the ones defined in
105 pickle.py, when the C-optimized Pickler and Unpickler are used. */
106static PyObject *PickleError;
107static PyObject *PicklingError;
108static PyObject *UnpicklingError;
109
110/* copyreg.dispatch_table, {type_object: pickling_function} */
111static PyObject *dispatch_table;
112/* For EXT[124] opcodes. */
113/* copyreg._extension_registry, {(module_name, function_name): code} */
114static PyObject *extension_registry;
115/* copyreg._inverted_registry, {code: (module_name, function_name)} */
116static PyObject *inverted_registry;
117/* copyreg._extension_cache, {code: object} */
118static PyObject *extension_cache;
119
120/* XXX: Are these really nescessary? */
121/* As the name says, an empty tuple. */
122static PyObject *empty_tuple;
123/* For looking up name pairs in copyreg._extension_registry. */
124static PyObject *two_tuple;
125
126static int
127stack_underflow(void)
128{
129 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
130 return -1;
131}
132
133/* Internal data type used as the unpickling stack. */
134typedef struct {
135 PyObject_HEAD
136 int length; /* number of initial slots in data currently used */
137 int size; /* number of slots in data allocated */
138 PyObject **data;
139} Pdata;
140
141static void
142Pdata_dealloc(Pdata *self)
143{
144 int i;
145 PyObject **p;
146
147 for (i = self->length, p = self->data; --i >= 0; p++) {
148 Py_DECREF(*p);
149 }
150 if (self->data)
151 PyMem_Free(self->data);
152 PyObject_Del(self);
153}
154
155static PyTypeObject Pdata_Type = {
156 PyVarObject_HEAD_INIT(NULL, 0)
157 "_pickle.Pdata", /*tp_name*/
158 sizeof(Pdata), /*tp_basicsize*/
159 0, /*tp_itemsize*/
160 (destructor)Pdata_dealloc, /*tp_dealloc*/
161};
162
163static PyObject *
164Pdata_New(void)
165{
166 Pdata *self;
167
168 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
169 return NULL;
170 self->size = 8;
171 self->length = 0;
172 self->data = PyMem_Malloc(self->size * sizeof(PyObject *));
173 if (self->data)
174 return (PyObject *)self;
175 Py_DECREF(self);
176 return PyErr_NoMemory();
177}
178
179
180/* Retain only the initial clearto items. If clearto >= the current
181 * number of items, this is a (non-erroneous) NOP.
182 */
183static int
184Pdata_clear(Pdata *self, int clearto)
185{
186 int i;
187 PyObject **p;
188
189 if (clearto < 0)
190 return stack_underflow();
191 if (clearto >= self->length)
192 return 0;
193
194 for (i = self->length, p = self->data + clearto; --i >= clearto; p++) {
195 Py_CLEAR(*p);
196 }
197 self->length = clearto;
198
199 return 0;
200}
201
202static int
203Pdata_grow(Pdata *self)
204{
205 int bigger;
206 size_t nbytes;
207 PyObject **tmp;
208
209 bigger = (self->size << 1) + 1;
210 if (bigger <= 0) /* was 0, or new value overflows */
211 goto nomemory;
212 if ((int)(size_t)bigger != bigger)
213 goto nomemory;
214 nbytes = (size_t)bigger * sizeof(PyObject *);
215 if (nbytes / sizeof(PyObject *) != (size_t)bigger)
216 goto nomemory;
217 tmp = PyMem_Realloc(self->data, nbytes);
218 if (tmp == NULL)
219 goto nomemory;
220 self->data = tmp;
221 self->size = bigger;
222 return 0;
223
224 nomemory:
225 PyErr_NoMemory();
226 return -1;
227}
228
229/* D is a Pdata*. Pop the topmost element and store it into V, which
230 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
231 * is raised and V is set to NULL.
232 */
233static PyObject *
234Pdata_pop(Pdata *self)
235{
236 if (self->length == 0) {
237 PyErr_SetString(UnpicklingError, "bad pickle data");
238 return NULL;
239 }
240 return self->data[--(self->length)];
241}
242#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
243
244static int
245Pdata_push(Pdata *self, PyObject *obj)
246{
247 if (self->length == self->size && Pdata_grow(self) < 0) {
248 return -1;
249 }
250 self->data[self->length++] = obj;
251 return 0;
252}
253
254/* Push an object on stack, transferring its ownership to the stack. */
255#define PDATA_PUSH(D, O, ER) do { \
256 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
257
258/* Push an object on stack, adding a new reference to the object. */
259#define PDATA_APPEND(D, O, ER) do { \
260 Py_INCREF((O)); \
261 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
262
263static PyObject *
264Pdata_poptuple(Pdata *self, Py_ssize_t start)
265{
266 PyObject *tuple;
267 Py_ssize_t len, i, j;
268
269 len = self->length - start;
270 tuple = PyTuple_New(len);
271 if (tuple == NULL)
272 return NULL;
273 for (i = start, j = 0; j < len; i++, j++)
274 PyTuple_SET_ITEM(tuple, j, self->data[i]);
275
276 self->length = start;
277 return tuple;
278}
279
280static PyObject *
281Pdata_poplist(Pdata *self, Py_ssize_t start)
282{
283 PyObject *list;
284 Py_ssize_t len, i, j;
285
286 len = self->length - start;
287 list = PyList_New(len);
288 if (list == NULL)
289 return NULL;
290 for (i = start, j = 0; j < len; i++, j++)
291 PyList_SET_ITEM(list, j, self->data[i]);
292
293 self->length = start;
294 return list;
295}
296
297typedef struct PicklerObject {
298 PyObject_HEAD
299 PyObject *write; /* write() method of the output stream */
300 PyObject *memo; /* Memo dictionary, keep track of the seen
301 objects to support self-referential objects
302 pickling. */
303 PyObject *pers_func; /* persistent_id() method, can be NULL */
304 PyObject *arg;
305 int proto; /* Pickle protocol number, >= 0 */
306 int bin; /* Boolean, true if proto > 0 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000307 int buf_size; /* Size of the current buffered pickle data */
308 char *write_buf; /* Write buffer, this is to avoid calling the
309 write() method of the output stream too
310 often. */
311 int fast; /* Enable fast mode if set to a true value.
312 The fast mode disable the usage of memo,
313 therefore speeding the pickling process by
314 not generating superfluous PUT opcodes. It
315 should not be used if with self-referential
316 objects. */
317 int fast_nesting;
318 PyObject *fast_memo;
319} PicklerObject;
320
321typedef struct UnpicklerObject {
322 PyObject_HEAD
323 Pdata *stack; /* Pickle data stack, store unpickled objects. */
324 PyObject *readline; /* readline() method of the output stream */
325 PyObject *read; /* read() method of the output stream */
326 PyObject *memo; /* Memo dictionary, provide the objects stored
327 using the PUT opcodes. */
328 PyObject *arg;
329 PyObject *pers_func; /* persistent_load() method, can be NULL. */
330 PyObject *last_string; /* Reference to the last string read by the
331 readline() method. */
332 char *buffer; /* Reading buffer. */
333 char *encoding; /* Name of the encoding to be used for
334 decoding strings pickled using Python
335 2.x. The default value is "ASCII" */
336 char *errors; /* Name of errors handling scheme to used when
337 decoding strings. The default value is
338 "strict". */
339 int *marks; /* Mark stack, used for unpickling container
340 objects. */
341 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
342 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
343} UnpicklerObject;
344
345/* Forward declarations */
346static int save(PicklerObject *, PyObject *, int);
347static int save_reduce(PicklerObject *, PyObject *, PyObject *);
348static PyTypeObject Pickler_Type;
349static PyTypeObject Unpickler_Type;
350
351
352/* Helpers for creating the argument tuple passed to functions. This has the
353 performance advantage of calling PyTuple_New() only once. */
354
355#define ARG_TUP(self, obj) do { \
356 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
357 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
358 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
359 } \
360 else { \
361 Py_DECREF((obj)); \
362 } \
363 } while (0)
364
365#define FREE_ARG_TUP(self) do { \
366 if ((self)->arg->ob_refcnt > 1) \
367 Py_CLEAR((self)->arg); \
368 } while (0)
369
370/* A temporary cleaner API for fast single argument function call.
371
372 XXX: Does caching the argument tuple provides any real performance benefits?
373
374 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
375 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
376 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
377 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
378 (i.e, call PyTuple_New() and store the returned value in an array), to save
379 one second (wall clock time). Either ways, the loading time a pickle stream
380 large enough to generate this number of calls would be massively
381 overwhelmed by other factors, like I/O throughput, the GC traversal and
382 object allocation overhead. So, I really doubt these functions provide any
383 real benefits.
384
385 On the other hand, oprofile reports that pickle spends a lot of time in
386 these functions. But, that is probably more related to the function call
387 overhead, than the argument tuple allocation.
388
389 XXX: And, what is the reference behavior of these? Steal, borrow? At first
390 glance, it seems to steal the reference of 'arg' and borrow the reference
391 of 'func'.
392 */
393static PyObject *
394pickler_call(PicklerObject *self, PyObject *func, PyObject *arg)
395{
396 PyObject *result = NULL;
397
398 ARG_TUP(self, arg);
399 if (self->arg) {
400 result = PyObject_Call(func, self->arg, NULL);
401 FREE_ARG_TUP(self);
402 }
403 return result;
404}
405
406static PyObject *
407unpickler_call(UnpicklerObject *self, PyObject *func, PyObject *arg)
408{
409 PyObject *result = NULL;
410
411 ARG_TUP(self, arg);
412 if (self->arg) {
413 result = PyObject_Call(func, self->arg, NULL);
414 FREE_ARG_TUP(self);
415 }
416 return result;
417}
418
419static Py_ssize_t
420pickler_write(PicklerObject *self, const char *s, Py_ssize_t n)
421{
422 PyObject *data, *result;
423
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000424 if (self->write_buf == NULL) {
425 PyErr_SetString(PyExc_SystemError, "invalid write buffer");
426 return -1;
427 }
428
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000429 if (s == NULL) {
430 if (!(self->buf_size))
431 return 0;
432 data = PyBytes_FromStringAndSize(self->write_buf, self->buf_size);
433 if (data == NULL)
434 return -1;
435 }
436 else {
437 if (self->buf_size && (n + self->buf_size) > WRITE_BUF_SIZE) {
438 if (pickler_write(self, NULL, 0) < 0)
439 return -1;
440 }
441
442 if (n > WRITE_BUF_SIZE) {
443 if (!(data = PyBytes_FromStringAndSize(s, n)))
444 return -1;
445 }
446 else {
447 memcpy(self->write_buf + self->buf_size, s, n);
448 self->buf_size += n;
449 return n;
450 }
451 }
452
453 /* object with write method */
454 result = pickler_call(self, self->write, data);
455 if (result == NULL)
456 return -1;
457
458 Py_DECREF(result);
459 self->buf_size = 0;
460 return n;
461}
462
463/* XXX: These read/readline functions ought to be optimized. Buffered I/O
464 might help a lot, especially with the new (but much slower) io library.
465 On the other hand, the added complexity might not worth it.
466 */
467
468/* Read at least n characters from the input stream and set s to the current
469 reading position. */
470static Py_ssize_t
471unpickler_read(UnpicklerObject *self, char **s, Py_ssize_t n)
472{
473 PyObject *len;
474 PyObject *data;
475
476 len = PyLong_FromSsize_t(n);
477 if (len == NULL)
478 return -1;
479
480 data = unpickler_call(self, self->read, len);
481 if (data == NULL)
482 return -1;
483
484 /* XXX: Should bytearray be supported too? */
485 if (!PyBytes_Check(data)) {
486 PyErr_SetString(PyExc_ValueError,
487 "read() from the underlying stream did not"
488 "return bytes");
Amaury Forgeot d'Arc5f952572008-11-25 21:11:54 +0000489 Py_DECREF(data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000490 return -1;
491 }
492
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000493 if (PyBytes_GET_SIZE(data) != n) {
494 PyErr_SetNone(PyExc_EOFError);
Amaury Forgeot d'Arc5f952572008-11-25 21:11:54 +0000495 Py_DECREF(data);
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000496 return -1;
497 }
498
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000499 Py_XDECREF(self->last_string);
500 self->last_string = data;
501
502 if (!(*s = PyBytes_AS_STRING(data)))
503 return -1;
504
505 return n;
506}
507
508static Py_ssize_t
509unpickler_readline(UnpicklerObject *self, char **s)
510{
511 PyObject *data;
512
513 data = PyObject_CallObject(self->readline, empty_tuple);
514 if (data == NULL)
515 return -1;
516
517 /* XXX: Should bytearray be supported too? */
518 if (!PyBytes_Check(data)) {
519 PyErr_SetString(PyExc_ValueError,
520 "readline() from the underlying stream did not"
521 "return bytes");
522 return -1;
523 }
524
525 Py_XDECREF(self->last_string);
526 self->last_string = data;
527
528 if (!(*s = PyBytes_AS_STRING(data)))
529 return -1;
530
531 return PyBytes_GET_SIZE(data);
532}
533
534/* Generate a GET opcode for an object stored in the memo. The 'key' argument
535 should be the address of the object as returned by PyLong_FromVoidPtr(). */
536static int
537memo_get(PicklerObject *self, PyObject *key)
538{
539 PyObject *value;
540 PyObject *memo_id;
541 long x;
542 char pdata[30];
543 int len;
544
545 value = PyDict_GetItemWithError(self->memo, key);
546 if (value == NULL) {
547 if (!PyErr_Occurred())
548 PyErr_SetObject(PyExc_KeyError, key);
549 return -1;
550 }
551
552 memo_id = PyTuple_GetItem(value, 0);
553 if (memo_id == NULL)
554 return -1;
555
556 if (!PyLong_Check(memo_id)) {
557 PyErr_SetString(PicklingError, "memo id must be an integer");
558 return -1;
559 }
560 x = PyLong_AsLong(memo_id);
561 if (x == -1 && PyErr_Occurred())
562 return -1;
563
564 if (!self->bin) {
565 pdata[0] = GET;
566 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
567 len = (int)strlen(pdata);
568 }
569 else {
570 if (x < 256) {
571 pdata[0] = BINGET;
572 pdata[1] = (unsigned char)(x & 0xff);
573 len = 2;
574 }
575 else if (x <= 0xffffffffL) {
576 pdata[0] = LONG_BINGET;
577 pdata[1] = (unsigned char)(x & 0xff);
578 pdata[2] = (unsigned char)((x >> 8) & 0xff);
579 pdata[3] = (unsigned char)((x >> 16) & 0xff);
580 pdata[4] = (unsigned char)((x >> 24) & 0xff);
581 len = 5;
582 }
583 else { /* unlikely */
584 PyErr_SetString(PicklingError,
585 "memo id too large for LONG_BINGET");
586 return -1;
587 }
588 }
589
590 if (pickler_write(self, pdata, len) < 0)
591 return -1;
592
593 return 0;
594}
595
596/* Store an object in the memo, assign it a new unique ID based on the number
597 of objects currently stored in the memo and generate a PUT opcode. */
598static int
599memo_put(PicklerObject *self, PyObject *obj)
600{
601 PyObject *key = NULL;
602 PyObject *memo_id = NULL;
603 PyObject *tuple = NULL;
604 long x;
605 char pdata[30];
606 int len;
607 int status = 0;
608
609 if (self->fast)
610 return 0;
611
612 key = PyLong_FromVoidPtr(obj);
613 if (key == NULL)
614 goto error;
615 if ((x = PyDict_Size(self->memo)) < 0)
616 goto error;
617 memo_id = PyLong_FromLong(x);
618 if (memo_id == NULL)
619 goto error;
620 tuple = PyTuple_New(2);
621 if (tuple == NULL)
622 goto error;
623
624 Py_INCREF(memo_id);
625 PyTuple_SET_ITEM(tuple, 0, memo_id);
626 Py_INCREF(obj);
627 PyTuple_SET_ITEM(tuple, 1, obj);
628 if (PyDict_SetItem(self->memo, key, tuple) < 0)
629 goto error;
630
631 if (!self->bin) {
632 pdata[0] = PUT;
633 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
634 len = strlen(pdata);
635 }
636 else {
637 if (x < 256) {
638 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +0000639 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000640 len = 2;
641 }
642 else if (x <= 0xffffffffL) {
643 pdata[0] = LONG_BINPUT;
644 pdata[1] = (unsigned char)(x & 0xff);
645 pdata[2] = (unsigned char)((x >> 8) & 0xff);
646 pdata[3] = (unsigned char)((x >> 16) & 0xff);
647 pdata[4] = (unsigned char)((x >> 24) & 0xff);
648 len = 5;
649 }
650 else { /* unlikely */
651 PyErr_SetString(PicklingError,
652 "memo id too large for LONG_BINPUT");
653 return -1;
654 }
655 }
656
657 if (pickler_write(self, pdata, len) < 0)
658 goto error;
659
660 if (0) {
661 error:
662 status = -1;
663 }
664
665 Py_XDECREF(key);
666 Py_XDECREF(memo_id);
667 Py_XDECREF(tuple);
668
669 return status;
670}
671
672static PyObject *
673whichmodule(PyObject *global, PyObject *global_name)
674{
675 Py_ssize_t i, j;
676 static PyObject *module_str = NULL;
677 static PyObject *main_str = NULL;
678 PyObject *module_name;
679 PyObject *modules_dict;
680 PyObject *module;
681 PyObject *obj;
682
683 if (module_str == NULL) {
684 module_str = PyUnicode_InternFromString("__module__");
685 if (module_str == NULL)
686 return NULL;
687 main_str = PyUnicode_InternFromString("__main__");
688 if (main_str == NULL)
689 return NULL;
690 }
691
692 module_name = PyObject_GetAttr(global, module_str);
693
694 /* In some rare cases (e.g., random.getrandbits), __module__ can be
695 None. If it is so, then search sys.modules for the module of
696 global. */
697 if (module_name == Py_None) {
698 Py_DECREF(module_name);
699 goto search;
700 }
701
702 if (module_name) {
703 return module_name;
704 }
705 if (PyErr_ExceptionMatches(PyExc_AttributeError))
706 PyErr_Clear();
707 else
708 return NULL;
709
710 search:
711 modules_dict = PySys_GetObject("modules");
712 if (modules_dict == NULL)
713 return NULL;
714
715 i = 0;
716 module_name = NULL;
717 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
718 if (PyObject_Compare(module_name, main_str) == 0)
719 continue;
720
721 obj = PyObject_GetAttr(module, global_name);
722 if (obj == NULL) {
723 if (PyErr_ExceptionMatches(PyExc_AttributeError))
724 PyErr_Clear();
725 else
726 return NULL;
727 continue;
728 }
729
730 if (obj != global) {
731 Py_DECREF(obj);
732 continue;
733 }
734
735 Py_DECREF(obj);
736 break;
737 }
738
739 /* If no module is found, use __main__. */
740 if (!j) {
741 module_name = main_str;
742 }
743
744 Py_INCREF(module_name);
745 return module_name;
746}
747
748/* fast_save_enter() and fast_save_leave() are guards against recursive
749 objects when Pickler is used with the "fast mode" (i.e., with object
750 memoization disabled). If the nesting of a list or dict object exceed
751 FAST_NESTING_LIMIT, these guards will start keeping an internal
752 reference to the seen list or dict objects and check whether these objects
753 are recursive. These are not strictly necessary, since save() has a
754 hard-coded recursion limit, but they give a nicer error message than the
755 typical RuntimeError. */
756static int
757fast_save_enter(PicklerObject *self, PyObject *obj)
758{
759 /* if fast_nesting < 0, we're doing an error exit. */
760 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
761 PyObject *key = NULL;
762 if (self->fast_memo == NULL) {
763 self->fast_memo = PyDict_New();
764 if (self->fast_memo == NULL) {
765 self->fast_nesting = -1;
766 return 0;
767 }
768 }
769 key = PyLong_FromVoidPtr(obj);
770 if (key == NULL)
771 return 0;
772 if (PyDict_GetItem(self->fast_memo, key)) {
773 Py_DECREF(key);
774 PyErr_Format(PyExc_ValueError,
775 "fast mode: can't pickle cyclic objects "
776 "including object type %.200s at %p",
777 obj->ob_type->tp_name, obj);
778 self->fast_nesting = -1;
779 return 0;
780 }
781 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
782 Py_DECREF(key);
783 self->fast_nesting = -1;
784 return 0;
785 }
786 Py_DECREF(key);
787 }
788 return 1;
789}
790
791static int
792fast_save_leave(PicklerObject *self, PyObject *obj)
793{
794 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
795 PyObject *key = PyLong_FromVoidPtr(obj);
796 if (key == NULL)
797 return 0;
798 if (PyDict_DelItem(self->fast_memo, key) < 0) {
799 Py_DECREF(key);
800 return 0;
801 }
802 Py_DECREF(key);
803 }
804 return 1;
805}
806
807static int
808save_none(PicklerObject *self, PyObject *obj)
809{
810 const char none_op = NONE;
811 if (pickler_write(self, &none_op, 1) < 0)
812 return -1;
813
814 return 0;
815}
816
817static int
818save_bool(PicklerObject *self, PyObject *obj)
819{
820 static const char *buf[2] = { FALSE, TRUE };
821 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
822 int p = (obj == Py_True);
823
824 if (self->proto >= 2) {
825 const char bool_op = p ? NEWTRUE : NEWFALSE;
826 if (pickler_write(self, &bool_op, 1) < 0)
827 return -1;
828 }
829 else if (pickler_write(self, buf[p], len[p]) < 0)
830 return -1;
831
832 return 0;
833}
834
835static int
836save_int(PicklerObject *self, long x)
837{
838 char pdata[32];
839 int len = 0;
840
841 if (!self->bin
842#if SIZEOF_LONG > 4
843 || x > 0x7fffffffL || x < -0x80000000L
844#endif
845 ) {
846 /* Text-mode pickle, or long too big to fit in the 4-byte
847 * signed BININT format: store as a string.
848 */
Mark Dickinson8dd05142009-01-20 20:43:58 +0000849 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
850 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000851 if (pickler_write(self, pdata, strlen(pdata)) < 0)
852 return -1;
853 }
854 else {
855 /* Binary pickle and x fits in a signed 4-byte int. */
856 pdata[1] = (unsigned char)(x & 0xff);
857 pdata[2] = (unsigned char)((x >> 8) & 0xff);
858 pdata[3] = (unsigned char)((x >> 16) & 0xff);
859 pdata[4] = (unsigned char)((x >> 24) & 0xff);
860
861 if ((pdata[4] == 0) && (pdata[3] == 0)) {
862 if (pdata[2] == 0) {
863 pdata[0] = BININT1;
864 len = 2;
865 }
866 else {
867 pdata[0] = BININT2;
868 len = 3;
869 }
870 }
871 else {
872 pdata[0] = BININT;
873 len = 5;
874 }
875
876 if (pickler_write(self, pdata, len) < 0)
877 return -1;
878 }
879
880 return 0;
881}
882
883static int
884save_long(PicklerObject *self, PyObject *obj)
885{
886 PyObject *repr = NULL;
887 Py_ssize_t size;
888 long val = PyLong_AsLong(obj);
889 int status = 0;
890
891 const char long_op = LONG;
892
893 if (val == -1 && PyErr_Occurred()) {
894 /* out of range for int pickling */
895 PyErr_Clear();
896 }
897 else
898 return save_int(self, val);
899
900 if (self->proto >= 2) {
901 /* Linear-time pickling. */
902 size_t nbits;
903 size_t nbytes;
904 unsigned char *pdata;
905 char header[5];
906 int i;
907 int sign = _PyLong_Sign(obj);
908
909 if (sign == 0) {
910 header[0] = LONG1;
911 header[1] = 0; /* It's 0 -- an empty bytestring. */
912 if (pickler_write(self, header, 2) < 0)
913 goto error;
914 return 0;
915 }
916 nbits = _PyLong_NumBits(obj);
917 if (nbits == (size_t)-1 && PyErr_Occurred())
918 goto error;
919 /* How many bytes do we need? There are nbits >> 3 full
920 * bytes of data, and nbits & 7 leftover bits. If there
921 * are any leftover bits, then we clearly need another
922 * byte. Wnat's not so obvious is that we *probably*
923 * need another byte even if there aren't any leftovers:
924 * the most-significant bit of the most-significant byte
925 * acts like a sign bit, and it's usually got a sense
926 * opposite of the one we need. The exception is longs
927 * of the form -(2**(8*j-1)) for j > 0. Such a long is
928 * its own 256's-complement, so has the right sign bit
929 * even without the extra byte. That's a pain to check
930 * for in advance, though, so we always grab an extra
931 * byte at the start, and cut it back later if possible.
932 */
933 nbytes = (nbits >> 3) + 1;
934 if (nbytes > INT_MAX) {
935 PyErr_SetString(PyExc_OverflowError,
936 "long too large to pickle");
937 goto error;
938 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000939 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000940 if (repr == NULL)
941 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000942 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000943 i = _PyLong_AsByteArray((PyLongObject *)obj,
944 pdata, nbytes,
945 1 /* little endian */ , 1 /* signed */ );
946 if (i < 0)
947 goto error;
948 /* If the long is negative, this may be a byte more than
949 * needed. This is so iff the MSB is all redundant sign
950 * bits.
951 */
952 if (sign < 0 &&
953 nbytes > 1 &&
954 pdata[nbytes - 1] == 0xff &&
955 (pdata[nbytes - 2] & 0x80) != 0) {
956 nbytes--;
957 }
958
959 if (nbytes < 256) {
960 header[0] = LONG1;
961 header[1] = (unsigned char)nbytes;
962 size = 2;
963 }
964 else {
965 header[0] = LONG4;
966 size = (int)nbytes;
967 for (i = 1; i < 5; i++) {
968 header[i] = (unsigned char)(size & 0xff);
969 size >>= 8;
970 }
971 size = 5;
972 }
973 if (pickler_write(self, header, size) < 0 ||
974 pickler_write(self, (char *)pdata, (int)nbytes) < 0)
975 goto error;
976 }
977 else {
978 char *string;
979
Mark Dickinson8dd05142009-01-20 20:43:58 +0000980 /* proto < 2: write the repr and newline. This is quadratic-time (in
981 the number of digits), in both directions. We add a trailing 'L'
982 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000983
984 repr = PyObject_Repr(obj);
985 if (repr == NULL)
986 goto error;
987
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000988 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000989 if (string == NULL)
990 goto error;
991
992 if (pickler_write(self, &long_op, 1) < 0 ||
993 pickler_write(self, string, size) < 0 ||
Mark Dickinson8dd05142009-01-20 20:43:58 +0000994 pickler_write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000995 goto error;
996 }
997
998 if (0) {
999 error:
1000 status = -1;
1001 }
1002 Py_XDECREF(repr);
1003
1004 return status;
1005}
1006
1007static int
1008save_float(PicklerObject *self, PyObject *obj)
1009{
1010 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1011
1012 if (self->bin) {
1013 char pdata[9];
1014 pdata[0] = BINFLOAT;
1015 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1016 return -1;
1017 if (pickler_write(self, pdata, 9) < 0)
1018 return -1;
1019 }
1020 else {
1021 char pdata[250];
1022 pdata[0] = FLOAT;
1023 PyOS_ascii_formatd(pdata + 1, sizeof(pdata) - 2, "%.17g", x);
1024 /* Extend the formatted string with a newline character */
1025 strcat(pdata, "\n");
1026
1027 if (pickler_write(self, pdata, strlen(pdata)) < 0)
1028 return -1;
1029 }
1030
1031 return 0;
1032}
1033
1034static int
1035save_bytes(PicklerObject *self, PyObject *obj)
1036{
1037 if (self->proto < 3) {
1038 /* Older pickle protocols do not have an opcode for pickling bytes
1039 objects. Therefore, we need to fake the copy protocol (i.e.,
1040 the __reduce__ method) to permit bytes object unpickling. */
1041 PyObject *reduce_value = NULL;
1042 PyObject *bytelist = NULL;
1043 int status;
1044
1045 bytelist = PySequence_List(obj);
1046 if (bytelist == NULL)
1047 return -1;
1048
1049 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1050 bytelist);
1051 if (reduce_value == NULL) {
1052 Py_DECREF(bytelist);
1053 return -1;
1054 }
1055
1056 /* save_reduce() will memoize the object automatically. */
1057 status = save_reduce(self, reduce_value, obj);
1058 Py_DECREF(reduce_value);
1059 Py_DECREF(bytelist);
1060 return status;
1061 }
1062 else {
1063 Py_ssize_t size;
1064 char header[5];
1065 int len;
1066
1067 size = PyBytes_Size(obj);
1068 if (size < 0)
1069 return -1;
1070
1071 if (size < 256) {
1072 header[0] = SHORT_BINBYTES;
1073 header[1] = (unsigned char)size;
1074 len = 2;
1075 }
1076 else if (size <= 0xffffffffL) {
1077 header[0] = BINBYTES;
1078 header[1] = (unsigned char)(size & 0xff);
1079 header[2] = (unsigned char)((size >> 8) & 0xff);
1080 header[3] = (unsigned char)((size >> 16) & 0xff);
1081 header[4] = (unsigned char)((size >> 24) & 0xff);
1082 len = 5;
1083 }
1084 else {
1085 return -1; /* string too large */
1086 }
1087
1088 if (pickler_write(self, header, len) < 0)
1089 return -1;
1090
1091 if (pickler_write(self, PyBytes_AS_STRING(obj), size) < 0)
1092 return -1;
1093
1094 if (memo_put(self, obj) < 0)
1095 return -1;
1096
1097 return 0;
1098 }
1099}
1100
1101/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1102 backslash and newline characters to \uXXXX escapes. */
1103static PyObject *
1104raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1105{
1106 PyObject *repr, *result;
1107 char *p;
1108 char *q;
1109
1110 static const char *hexdigits = "0123456789abcdef";
1111
1112#ifdef Py_UNICODE_WIDE
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001113 const Py_ssize_t expandsize = 10;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001114#else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001115 const Py_ssize_t expandsize = 6;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001116#endif
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001117
1118 if (size > PY_SSIZE_T_MAX / expandsize)
1119 return PyErr_NoMemory();
1120
1121 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001122 if (repr == NULL)
1123 return NULL;
1124 if (size == 0)
1125 goto done;
1126
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001127 p = q = PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001128 while (size-- > 0) {
1129 Py_UNICODE ch = *s++;
1130#ifdef Py_UNICODE_WIDE
1131 /* Map 32-bit characters to '\Uxxxxxxxx' */
1132 if (ch >= 0x10000) {
1133 *p++ = '\\';
1134 *p++ = 'U';
1135 *p++ = hexdigits[(ch >> 28) & 0xf];
1136 *p++ = hexdigits[(ch >> 24) & 0xf];
1137 *p++ = hexdigits[(ch >> 20) & 0xf];
1138 *p++ = hexdigits[(ch >> 16) & 0xf];
1139 *p++ = hexdigits[(ch >> 12) & 0xf];
1140 *p++ = hexdigits[(ch >> 8) & 0xf];
1141 *p++ = hexdigits[(ch >> 4) & 0xf];
1142 *p++ = hexdigits[ch & 15];
1143 }
1144 else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001145#else
1146 /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
1147 if (ch >= 0xD800 && ch < 0xDC00) {
1148 Py_UNICODE ch2;
1149 Py_UCS4 ucs;
1150
1151 ch2 = *s++;
1152 size--;
1153 if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
1154 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
1155 *p++ = '\\';
1156 *p++ = 'U';
1157 *p++ = hexdigits[(ucs >> 28) & 0xf];
1158 *p++ = hexdigits[(ucs >> 24) & 0xf];
1159 *p++ = hexdigits[(ucs >> 20) & 0xf];
1160 *p++ = hexdigits[(ucs >> 16) & 0xf];
1161 *p++ = hexdigits[(ucs >> 12) & 0xf];
1162 *p++ = hexdigits[(ucs >> 8) & 0xf];
1163 *p++ = hexdigits[(ucs >> 4) & 0xf];
1164 *p++ = hexdigits[ucs & 0xf];
1165 continue;
1166 }
1167 /* Fall through: isolated surrogates are copied as-is */
1168 s--;
1169 size++;
1170 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001171#endif
1172 /* Map 16-bit characters to '\uxxxx' */
1173 if (ch >= 256 || ch == '\\' || ch == '\n') {
1174 *p++ = '\\';
1175 *p++ = 'u';
1176 *p++ = hexdigits[(ch >> 12) & 0xf];
1177 *p++ = hexdigits[(ch >> 8) & 0xf];
1178 *p++ = hexdigits[(ch >> 4) & 0xf];
1179 *p++ = hexdigits[ch & 15];
1180 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001181 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001182 else
1183 *p++ = (char) ch;
1184 }
1185 size = p - q;
1186
1187 done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001188 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001189 Py_DECREF(repr);
1190 return result;
1191}
1192
1193static int
1194save_unicode(PicklerObject *self, PyObject *obj)
1195{
1196 Py_ssize_t size;
1197 PyObject *encoded = NULL;
1198
1199 if (self->bin) {
1200 char pdata[5];
1201
1202 encoded = PyUnicode_AsUTF8String(obj);
1203 if (encoded == NULL)
1204 goto error;
1205
1206 size = PyBytes_GET_SIZE(encoded);
1207 if (size < 0 || size > 0xffffffffL)
1208 goto error; /* string too large */
1209
1210 pdata[0] = BINUNICODE;
1211 pdata[1] = (unsigned char)(size & 0xff);
1212 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1213 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1214 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1215
1216 if (pickler_write(self, pdata, 5) < 0)
1217 goto error;
1218
1219 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1220 goto error;
1221 }
1222 else {
1223 const char unicode_op = UNICODE;
1224
1225 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1226 PyUnicode_GET_SIZE(obj));
1227 if (encoded == NULL)
1228 goto error;
1229
1230 if (pickler_write(self, &unicode_op, 1) < 0)
1231 goto error;
1232
1233 size = PyBytes_GET_SIZE(encoded);
1234 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1235 goto error;
1236
1237 if (pickler_write(self, "\n", 1) < 0)
1238 goto error;
1239 }
1240 if (memo_put(self, obj) < 0)
1241 goto error;
1242
1243 Py_DECREF(encoded);
1244 return 0;
1245
1246 error:
1247 Py_XDECREF(encoded);
1248 return -1;
1249}
1250
1251/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1252static int
1253store_tuple_elements(PicklerObject *self, PyObject *t, int len)
1254{
1255 int i;
1256
1257 assert(PyTuple_Size(t) == len);
1258
1259 for (i = 0; i < len; i++) {
1260 PyObject *element = PyTuple_GET_ITEM(t, i);
1261
1262 if (element == NULL)
1263 return -1;
1264 if (save(self, element, 0) < 0)
1265 return -1;
1266 }
1267
1268 return 0;
1269}
1270
1271/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1272 * used across protocols to minimize the space needed to pickle them.
1273 * Tuples are also the only builtin immutable type that can be recursive
1274 * (a tuple can be reached from itself), and that requires some subtle
1275 * magic so that it works in all cases. IOW, this is a long routine.
1276 */
1277static int
1278save_tuple(PicklerObject *self, PyObject *obj)
1279{
1280 PyObject *memo_key = NULL;
1281 int len, i;
1282 int status = 0;
1283
1284 const char mark_op = MARK;
1285 const char tuple_op = TUPLE;
1286 const char pop_op = POP;
1287 const char pop_mark_op = POP_MARK;
1288 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1289
1290 if ((len = PyTuple_Size(obj)) < 0)
1291 return -1;
1292
1293 if (len == 0) {
1294 char pdata[2];
1295
1296 if (self->proto) {
1297 pdata[0] = EMPTY_TUPLE;
1298 len = 1;
1299 }
1300 else {
1301 pdata[0] = MARK;
1302 pdata[1] = TUPLE;
1303 len = 2;
1304 }
1305 if (pickler_write(self, pdata, len) < 0)
1306 return -1;
1307 return 0;
1308 }
1309
1310 /* id(tuple) isn't in the memo now. If it shows up there after
1311 * saving the tuple elements, the tuple must be recursive, in
1312 * which case we'll pop everything we put on the stack, and fetch
1313 * its value from the memo.
1314 */
1315 memo_key = PyLong_FromVoidPtr(obj);
1316 if (memo_key == NULL)
1317 return -1;
1318
1319 if (len <= 3 && self->proto >= 2) {
1320 /* Use TUPLE{1,2,3} opcodes. */
1321 if (store_tuple_elements(self, obj, len) < 0)
1322 goto error;
1323
1324 if (PyDict_GetItem(self->memo, memo_key)) {
1325 /* pop the len elements */
1326 for (i = 0; i < len; i++)
1327 if (pickler_write(self, &pop_op, 1) < 0)
1328 goto error;
1329 /* fetch from memo */
1330 if (memo_get(self, memo_key) < 0)
1331 goto error;
1332
1333 Py_DECREF(memo_key);
1334 return 0;
1335 }
1336 else { /* Not recursive. */
1337 if (pickler_write(self, len2opcode + len, 1) < 0)
1338 goto error;
1339 }
1340 goto memoize;
1341 }
1342
1343 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
1344 * Generate MARK e1 e2 ... TUPLE
1345 */
1346 if (pickler_write(self, &mark_op, 1) < 0)
1347 goto error;
1348
1349 if (store_tuple_elements(self, obj, len) < 0)
1350 goto error;
1351
1352 if (PyDict_GetItem(self->memo, memo_key)) {
1353 /* pop the stack stuff we pushed */
1354 if (self->bin) {
1355 if (pickler_write(self, &pop_mark_op, 1) < 0)
1356 goto error;
1357 }
1358 else {
1359 /* Note that we pop one more than len, to remove
1360 * the MARK too.
1361 */
1362 for (i = 0; i <= len; i++)
1363 if (pickler_write(self, &pop_op, 1) < 0)
1364 goto error;
1365 }
1366 /* fetch from memo */
1367 if (memo_get(self, memo_key) < 0)
1368 goto error;
1369
1370 Py_DECREF(memo_key);
1371 return 0;
1372 }
1373 else { /* Not recursive. */
1374 if (pickler_write(self, &tuple_op, 1) < 0)
1375 goto error;
1376 }
1377
1378 memoize:
1379 if (memo_put(self, obj) < 0)
1380 goto error;
1381
1382 if (0) {
1383 error:
1384 status = -1;
1385 }
1386
1387 Py_DECREF(memo_key);
1388 return status;
1389}
1390
1391/* iter is an iterator giving items, and we batch up chunks of
1392 * MARK item item ... item APPENDS
1393 * opcode sequences. Calling code should have arranged to first create an
1394 * empty list, or list-like object, for the APPENDS to operate on.
1395 * Returns 0 on success, <0 on error.
1396 */
1397static int
1398batch_list(PicklerObject *self, PyObject *iter)
1399{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001400 PyObject *obj = NULL;
1401 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001402 int i, n;
1403
1404 const char mark_op = MARK;
1405 const char append_op = APPEND;
1406 const char appends_op = APPENDS;
1407
1408 assert(iter != NULL);
1409
1410 /* XXX: I think this function could be made faster by avoiding the
1411 iterator interface and fetching objects directly from list using
1412 PyList_GET_ITEM.
1413 */
1414
1415 if (self->proto == 0) {
1416 /* APPENDS isn't available; do one at a time. */
1417 for (;;) {
1418 obj = PyIter_Next(iter);
1419 if (obj == NULL) {
1420 if (PyErr_Occurred())
1421 return -1;
1422 break;
1423 }
1424 i = save(self, obj, 0);
1425 Py_DECREF(obj);
1426 if (i < 0)
1427 return -1;
1428 if (pickler_write(self, &append_op, 1) < 0)
1429 return -1;
1430 }
1431 return 0;
1432 }
1433
1434 /* proto > 0: write in batches of BATCHSIZE. */
1435 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001436 /* Get first item */
1437 firstitem = PyIter_Next(iter);
1438 if (firstitem == NULL) {
1439 if (PyErr_Occurred())
1440 goto error;
1441
1442 /* nothing more to add */
1443 break;
1444 }
1445
1446 /* Try to get a second item */
1447 obj = PyIter_Next(iter);
1448 if (obj == NULL) {
1449 if (PyErr_Occurred())
1450 goto error;
1451
1452 /* Only one item to write */
1453 if (save(self, firstitem, 0) < 0)
1454 goto error;
1455 if (pickler_write(self, &append_op, 1) < 0)
1456 goto error;
1457 Py_CLEAR(firstitem);
1458 break;
1459 }
1460
1461 /* More than one item to write */
1462
1463 /* Pump out MARK, items, APPENDS. */
1464 if (pickler_write(self, &mark_op, 1) < 0)
1465 goto error;
1466
1467 if (save(self, firstitem, 0) < 0)
1468 goto error;
1469 Py_CLEAR(firstitem);
1470 n = 1;
1471
1472 /* Fetch and save up to BATCHSIZE items */
1473 while (obj) {
1474 if (save(self, obj, 0) < 0)
1475 goto error;
1476 Py_CLEAR(obj);
1477 n += 1;
1478
1479 if (n == BATCHSIZE)
1480 break;
1481
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001482 obj = PyIter_Next(iter);
1483 if (obj == NULL) {
1484 if (PyErr_Occurred())
1485 goto error;
1486 break;
1487 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001488 }
1489
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001490 if (pickler_write(self, &appends_op, 1) < 0)
1491 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001492
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001493 } while (n == BATCHSIZE);
1494 return 0;
1495
1496 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001497 Py_XDECREF(firstitem);
1498 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001499 return -1;
1500}
1501
1502static int
1503save_list(PicklerObject *self, PyObject *obj)
1504{
1505 PyObject *iter;
1506 char header[3];
1507 int len;
1508 int status = 0;
1509
1510 if (self->fast && !fast_save_enter(self, obj))
1511 goto error;
1512
1513 /* Create an empty list. */
1514 if (self->bin) {
1515 header[0] = EMPTY_LIST;
1516 len = 1;
1517 }
1518 else {
1519 header[0] = MARK;
1520 header[1] = LIST;
1521 len = 2;
1522 }
1523
1524 if (pickler_write(self, header, len) < 0)
1525 goto error;
1526
1527 /* Get list length, and bow out early if empty. */
1528 if ((len = PyList_Size(obj)) < 0)
1529 goto error;
1530
1531 if (memo_put(self, obj) < 0)
1532 goto error;
1533
1534 if (len != 0) {
1535 /* Save the list elements. */
1536 iter = PyObject_GetIter(obj);
1537 if (iter == NULL)
1538 goto error;
1539 status = batch_list(self, iter);
1540 Py_DECREF(iter);
1541 }
1542
1543 if (0) {
1544 error:
1545 status = -1;
1546 }
1547
1548 if (self->fast && !fast_save_leave(self, obj))
1549 status = -1;
1550
1551 return status;
1552}
1553
1554/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
1555 * MARK key value ... key value SETITEMS
1556 * opcode sequences. Calling code should have arranged to first create an
1557 * empty dict, or dict-like object, for the SETITEMS to operate on.
1558 * Returns 0 on success, <0 on error.
1559 *
1560 * This is very much like batch_list(). The difference between saving
1561 * elements directly, and picking apart two-tuples, is so long-winded at
1562 * the C level, though, that attempts to combine these routines were too
1563 * ugly to bear.
1564 */
1565static int
1566batch_dict(PicklerObject *self, PyObject *iter)
1567{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001568 PyObject *obj = NULL;
1569 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001570 int i, n;
1571
1572 const char mark_op = MARK;
1573 const char setitem_op = SETITEM;
1574 const char setitems_op = SETITEMS;
1575
1576 assert(iter != NULL);
1577
1578 if (self->proto == 0) {
1579 /* SETITEMS isn't available; do one at a time. */
1580 for (;;) {
1581 obj = PyIter_Next(iter);
1582 if (obj == NULL) {
1583 if (PyErr_Occurred())
1584 return -1;
1585 break;
1586 }
1587 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1588 PyErr_SetString(PyExc_TypeError, "dict items "
1589 "iterator must return 2-tuples");
1590 return -1;
1591 }
1592 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
1593 if (i >= 0)
1594 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
1595 Py_DECREF(obj);
1596 if (i < 0)
1597 return -1;
1598 if (pickler_write(self, &setitem_op, 1) < 0)
1599 return -1;
1600 }
1601 return 0;
1602 }
1603
1604 /* proto > 0: write in batches of BATCHSIZE. */
1605 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001606 /* Get first item */
1607 firstitem = PyIter_Next(iter);
1608 if (firstitem == NULL) {
1609 if (PyErr_Occurred())
1610 goto error;
1611
1612 /* nothing more to add */
1613 break;
1614 }
1615 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
1616 PyErr_SetString(PyExc_TypeError, "dict items "
1617 "iterator must return 2-tuples");
1618 goto error;
1619 }
1620
1621 /* Try to get a second item */
1622 obj = PyIter_Next(iter);
1623 if (obj == NULL) {
1624 if (PyErr_Occurred())
1625 goto error;
1626
1627 /* Only one item to write */
1628 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1629 goto error;
1630 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1631 goto error;
1632 if (pickler_write(self, &setitem_op, 1) < 0)
1633 goto error;
1634 Py_CLEAR(firstitem);
1635 break;
1636 }
1637
1638 /* More than one item to write */
1639
1640 /* Pump out MARK, items, SETITEMS. */
1641 if (pickler_write(self, &mark_op, 1) < 0)
1642 goto error;
1643
1644 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1645 goto error;
1646 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1647 goto error;
1648 Py_CLEAR(firstitem);
1649 n = 1;
1650
1651 /* Fetch and save up to BATCHSIZE items */
1652 while (obj) {
1653 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1654 PyErr_SetString(PyExc_TypeError, "dict items "
1655 "iterator must return 2-tuples");
1656 goto error;
1657 }
1658 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
1659 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
1660 goto error;
1661 Py_CLEAR(obj);
1662 n += 1;
1663
1664 if (n == BATCHSIZE)
1665 break;
1666
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001667 obj = PyIter_Next(iter);
1668 if (obj == NULL) {
1669 if (PyErr_Occurred())
1670 goto error;
1671 break;
1672 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001673 }
1674
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001675 if (pickler_write(self, &setitems_op, 1) < 0)
1676 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001677
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001678 } while (n == BATCHSIZE);
1679 return 0;
1680
1681 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001682 Py_XDECREF(firstitem);
1683 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001684 return -1;
1685}
1686
1687static int
1688save_dict(PicklerObject *self, PyObject *obj)
1689{
1690 PyObject *items, *iter;
1691 char header[3];
1692 int len;
1693 int status = 0;
1694
1695 if (self->fast && !fast_save_enter(self, obj))
1696 goto error;
1697
1698 /* Create an empty dict. */
1699 if (self->bin) {
1700 header[0] = EMPTY_DICT;
1701 len = 1;
1702 }
1703 else {
1704 header[0] = MARK;
1705 header[1] = DICT;
1706 len = 2;
1707 }
1708
1709 if (pickler_write(self, header, len) < 0)
1710 goto error;
1711
1712 /* Get dict size, and bow out early if empty. */
1713 if ((len = PyDict_Size(obj)) < 0)
1714 goto error;
1715
1716 if (memo_put(self, obj) < 0)
1717 goto error;
1718
1719 if (len != 0) {
1720 /* Save the dict items. */
1721 items = PyObject_CallMethod(obj, "items", "()");
1722 if (items == NULL)
1723 goto error;
1724 iter = PyObject_GetIter(items);
1725 Py_DECREF(items);
1726 if (iter == NULL)
1727 goto error;
1728 status = batch_dict(self, iter);
1729 Py_DECREF(iter);
1730 }
1731
1732 if (0) {
1733 error:
1734 status = -1;
1735 }
1736
1737 if (self->fast && !fast_save_leave(self, obj))
1738 status = -1;
1739
1740 return status;
1741}
1742
1743static int
1744save_global(PicklerObject *self, PyObject *obj, PyObject *name)
1745{
1746 static PyObject *name_str = NULL;
1747 PyObject *global_name = NULL;
1748 PyObject *module_name = NULL;
1749 PyObject *module = NULL;
1750 PyObject *cls;
1751 int status = 0;
1752
1753 const char global_op = GLOBAL;
1754
1755 if (name_str == NULL) {
1756 name_str = PyUnicode_InternFromString("__name__");
1757 if (name_str == NULL)
1758 goto error;
1759 }
1760
1761 if (name) {
1762 global_name = name;
1763 Py_INCREF(global_name);
1764 }
1765 else {
1766 global_name = PyObject_GetAttr(obj, name_str);
1767 if (global_name == NULL)
1768 goto error;
1769 }
1770
1771 module_name = whichmodule(obj, global_name);
1772 if (module_name == NULL)
1773 goto error;
1774
1775 /* XXX: Change to use the import C API directly with level=0 to disallow
1776 relative imports.
1777
1778 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
1779 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
1780 custom import functions (IMHO, this would be a nice security
1781 feature). The import C API would need to be extended to support the
1782 extra parameters of __import__ to fix that. */
1783 module = PyImport_Import(module_name);
1784 if (module == NULL) {
1785 PyErr_Format(PicklingError,
1786 "Can't pickle %R: import of module %R failed",
1787 obj, module_name);
1788 goto error;
1789 }
1790 cls = PyObject_GetAttr(module, global_name);
1791 if (cls == NULL) {
1792 PyErr_Format(PicklingError,
1793 "Can't pickle %R: attribute lookup %S.%S failed",
1794 obj, module_name, global_name);
1795 goto error;
1796 }
1797 if (cls != obj) {
1798 Py_DECREF(cls);
1799 PyErr_Format(PicklingError,
1800 "Can't pickle %R: it's not the same object as %S.%S",
1801 obj, module_name, global_name);
1802 goto error;
1803 }
1804 Py_DECREF(cls);
1805
1806 if (self->proto >= 2) {
1807 /* See whether this is in the extension registry, and if
1808 * so generate an EXT opcode.
1809 */
1810 PyObject *code_obj; /* extension code as Python object */
1811 long code; /* extension code as C value */
1812 char pdata[5];
1813 int n;
1814
1815 PyTuple_SET_ITEM(two_tuple, 0, module_name);
1816 PyTuple_SET_ITEM(two_tuple, 1, global_name);
1817 code_obj = PyDict_GetItem(extension_registry, two_tuple);
1818 /* The object is not registered in the extension registry.
1819 This is the most likely code path. */
1820 if (code_obj == NULL)
1821 goto gen_global;
1822
1823 /* XXX: pickle.py doesn't check neither the type, nor the range
1824 of the value returned by the extension_registry. It should for
1825 consistency. */
1826
1827 /* Verify code_obj has the right type and value. */
1828 if (!PyLong_Check(code_obj)) {
1829 PyErr_Format(PicklingError,
1830 "Can't pickle %R: extension code %R isn't an integer",
1831 obj, code_obj);
1832 goto error;
1833 }
1834 code = PyLong_AS_LONG(code_obj);
1835 if (code <= 0 || code > 0x7fffffffL) {
1836 PyErr_Format(PicklingError,
1837 "Can't pickle %R: extension code %ld is out of range",
1838 obj, code);
1839 goto error;
1840 }
1841
1842 /* Generate an EXT opcode. */
1843 if (code <= 0xff) {
1844 pdata[0] = EXT1;
1845 pdata[1] = (unsigned char)code;
1846 n = 2;
1847 }
1848 else if (code <= 0xffff) {
1849 pdata[0] = EXT2;
1850 pdata[1] = (unsigned char)(code & 0xff);
1851 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1852 n = 3;
1853 }
1854 else {
1855 pdata[0] = EXT4;
1856 pdata[1] = (unsigned char)(code & 0xff);
1857 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1858 pdata[3] = (unsigned char)((code >> 16) & 0xff);
1859 pdata[4] = (unsigned char)((code >> 24) & 0xff);
1860 n = 5;
1861 }
1862
1863 if (pickler_write(self, pdata, n) < 0)
1864 goto error;
1865 }
1866 else {
1867 /* Generate a normal global opcode if we are using a pickle
1868 protocol <= 2, or if the object is not registered in the
1869 extension registry. */
1870 PyObject *encoded;
1871 PyObject *(*unicode_encoder)(PyObject *);
1872
1873 gen_global:
1874 if (pickler_write(self, &global_op, 1) < 0)
1875 goto error;
1876
1877 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
1878 the module name and the global name using UTF-8. We do so only when
1879 we are using the pickle protocol newer than version 3. This is to
1880 ensure compatibility with older Unpickler running on Python 2.x. */
1881 if (self->proto >= 3) {
1882 unicode_encoder = PyUnicode_AsUTF8String;
1883 }
1884 else {
1885 unicode_encoder = PyUnicode_AsASCIIString;
1886 }
1887
1888 /* Save the name of the module. */
1889 encoded = unicode_encoder(module_name);
1890 if (encoded == NULL) {
1891 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1892 PyErr_Format(PicklingError,
1893 "can't pickle module identifier '%S' using "
1894 "pickle protocol %i", module_name, self->proto);
1895 goto error;
1896 }
1897 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1898 PyBytes_GET_SIZE(encoded)) < 0) {
1899 Py_DECREF(encoded);
1900 goto error;
1901 }
1902 Py_DECREF(encoded);
1903 if(pickler_write(self, "\n", 1) < 0)
1904 goto error;
1905
1906 /* Save the name of the module. */
1907 encoded = unicode_encoder(global_name);
1908 if (encoded == NULL) {
1909 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1910 PyErr_Format(PicklingError,
1911 "can't pickle global identifier '%S' using "
1912 "pickle protocol %i", global_name, self->proto);
1913 goto error;
1914 }
1915 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1916 PyBytes_GET_SIZE(encoded)) < 0) {
1917 Py_DECREF(encoded);
1918 goto error;
1919 }
1920 Py_DECREF(encoded);
1921 if(pickler_write(self, "\n", 1) < 0)
1922 goto error;
1923
1924 /* Memoize the object. */
1925 if (memo_put(self, obj) < 0)
1926 goto error;
1927 }
1928
1929 if (0) {
1930 error:
1931 status = -1;
1932 }
1933 Py_XDECREF(module_name);
1934 Py_XDECREF(global_name);
1935 Py_XDECREF(module);
1936
1937 return status;
1938}
1939
1940static int
1941save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
1942{
1943 PyObject *pid = NULL;
1944 int status = 0;
1945
1946 const char persid_op = PERSID;
1947 const char binpersid_op = BINPERSID;
1948
1949 Py_INCREF(obj);
1950 pid = pickler_call(self, func, obj);
1951 if (pid == NULL)
1952 return -1;
1953
1954 if (pid != Py_None) {
1955 if (self->bin) {
1956 if (save(self, pid, 1) < 0 ||
1957 pickler_write(self, &binpersid_op, 1) < 0)
1958 goto error;
1959 }
1960 else {
1961 PyObject *pid_str = NULL;
1962 char *pid_ascii_bytes;
1963 Py_ssize_t size;
1964
1965 pid_str = PyObject_Str(pid);
1966 if (pid_str == NULL)
1967 goto error;
1968
1969 /* XXX: Should it check whether the persistent id only contains
1970 ASCII characters? And what if the pid contains embedded
1971 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001972 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001973 Py_DECREF(pid_str);
1974 if (pid_ascii_bytes == NULL)
1975 goto error;
1976
1977 if (pickler_write(self, &persid_op, 1) < 0 ||
1978 pickler_write(self, pid_ascii_bytes, size) < 0 ||
1979 pickler_write(self, "\n", 1) < 0)
1980 goto error;
1981 }
1982 status = 1;
1983 }
1984
1985 if (0) {
1986 error:
1987 status = -1;
1988 }
1989 Py_XDECREF(pid);
1990
1991 return status;
1992}
1993
1994/* We're saving obj, and args is the 2-thru-5 tuple returned by the
1995 * appropriate __reduce__ method for obj.
1996 */
1997static int
1998save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
1999{
2000 PyObject *callable;
2001 PyObject *argtup;
2002 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002003 PyObject *listitems = Py_None;
2004 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002005 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002006
2007 int use_newobj = self->proto >= 2;
2008
2009 const char reduce_op = REDUCE;
2010 const char build_op = BUILD;
2011 const char newobj_op = NEWOBJ;
2012
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002013 size = PyTuple_Size(args);
2014 if (size < 2 || size > 5) {
2015 PyErr_SetString(PicklingError, "tuple returned by "
2016 "__reduce__ must contain 2 through 5 elements");
2017 return -1;
2018 }
2019
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002020 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2021 &callable, &argtup, &state, &listitems, &dictitems))
2022 return -1;
2023
2024 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002025 PyErr_SetString(PicklingError, "first item of the tuple "
2026 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002027 return -1;
2028 }
2029 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002030 PyErr_SetString(PicklingError, "second item of the tuple "
2031 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002032 return -1;
2033 }
2034
2035 if (state == Py_None)
2036 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002037
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002038 if (listitems == Py_None)
2039 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002040 else if (!PyIter_Check(listitems)) {
2041 PyErr_Format(PicklingError, "Fourth element of tuple"
2042 "returned by __reduce__ must be an iterator, not %s",
2043 Py_TYPE(listitems)->tp_name);
2044 return -1;
2045 }
2046
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002047 if (dictitems == Py_None)
2048 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002049 else if (!PyIter_Check(dictitems)) {
2050 PyErr_Format(PicklingError, "Fifth element of tuple"
2051 "returned by __reduce__ must be an iterator, not %s",
2052 Py_TYPE(dictitems)->tp_name);
2053 return -1;
2054 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002055
2056 /* Protocol 2 special case: if callable's name is __newobj__, use
2057 NEWOBJ. */
2058 if (use_newobj) {
2059 static PyObject *newobj_str = NULL;
2060 PyObject *name_str;
2061
2062 if (newobj_str == NULL) {
2063 newobj_str = PyUnicode_InternFromString("__newobj__");
2064 }
2065
2066 name_str = PyObject_GetAttrString(callable, "__name__");
2067 if (name_str == NULL) {
2068 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2069 PyErr_Clear();
2070 else
2071 return -1;
2072 use_newobj = 0;
2073 }
2074 else {
2075 use_newobj = PyUnicode_Check(name_str) &&
2076 PyUnicode_Compare(name_str, newobj_str) == 0;
2077 Py_DECREF(name_str);
2078 }
2079 }
2080 if (use_newobj) {
2081 PyObject *cls;
2082 PyObject *newargtup;
2083 PyObject *obj_class;
2084 int p;
2085
2086 /* Sanity checks. */
2087 if (Py_SIZE(argtup) < 1) {
2088 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2089 return -1;
2090 }
2091
2092 cls = PyTuple_GET_ITEM(argtup, 0);
2093 if (!PyObject_HasAttrString(cls, "__new__")) {
2094 PyErr_SetString(PicklingError, "args[0] from "
2095 "__newobj__ args has no __new__");
2096 return -1;
2097 }
2098
2099 if (obj != NULL) {
2100 obj_class = PyObject_GetAttrString(obj, "__class__");
2101 if (obj_class == NULL) {
2102 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2103 PyErr_Clear();
2104 else
2105 return -1;
2106 }
2107 p = obj_class != cls; /* true iff a problem */
2108 Py_DECREF(obj_class);
2109 if (p) {
2110 PyErr_SetString(PicklingError, "args[0] from "
2111 "__newobj__ args has the wrong class");
2112 return -1;
2113 }
2114 }
2115 /* XXX: These calls save() are prone to infinite recursion. Imagine
2116 what happen if the value returned by the __reduce__() method of
2117 some extension type contains another object of the same type. Ouch!
2118
2119 Here is a quick example, that I ran into, to illustrate what I
2120 mean:
2121
2122 >>> import pickle, copyreg
2123 >>> copyreg.dispatch_table.pop(complex)
2124 >>> pickle.dumps(1+2j)
2125 Traceback (most recent call last):
2126 ...
2127 RuntimeError: maximum recursion depth exceeded
2128
2129 Removing the complex class from copyreg.dispatch_table made the
2130 __reduce_ex__() method emit another complex object:
2131
2132 >>> (1+1j).__reduce_ex__(2)
2133 (<function __newobj__ at 0xb7b71c3c>,
2134 (<class 'complex'>, (1+1j)), None, None, None)
2135
2136 Thus when save() was called on newargstup (the 2nd item) recursion
2137 ensued. Of course, the bug was in the complex class which had a
2138 broken __getnewargs__() that emitted another complex object. But,
2139 the point, here, is it is quite easy to end up with a broken reduce
2140 function. */
2141
2142 /* Save the class and its __new__ arguments. */
2143 if (save(self, cls, 0) < 0)
2144 return -1;
2145
2146 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
2147 if (newargtup == NULL)
2148 return -1;
2149
2150 p = save(self, newargtup, 0);
2151 Py_DECREF(newargtup);
2152 if (p < 0)
2153 return -1;
2154
2155 /* Add NEWOBJ opcode. */
2156 if (pickler_write(self, &newobj_op, 1) < 0)
2157 return -1;
2158 }
2159 else { /* Not using NEWOBJ. */
2160 if (save(self, callable, 0) < 0 ||
2161 save(self, argtup, 0) < 0 ||
2162 pickler_write(self, &reduce_op, 1) < 0)
2163 return -1;
2164 }
2165
2166 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
2167 the caller do not want to memoize the object. Not particularly useful,
2168 but that is to mimic the behavior save_reduce() in pickle.py when
2169 obj is None. */
2170 if (obj && memo_put(self, obj) < 0)
2171 return -1;
2172
2173 if (listitems && batch_list(self, listitems) < 0)
2174 return -1;
2175
2176 if (dictitems && batch_dict(self, dictitems) < 0)
2177 return -1;
2178
2179 if (state) {
2180 if (save(self, state, 0) < 0 ||
2181 pickler_write(self, &build_op, 1) < 0)
2182 return -1;
2183 }
2184
2185 return 0;
2186}
2187
2188static int
2189save(PicklerObject *self, PyObject *obj, int pers_save)
2190{
2191 PyTypeObject *type;
2192 PyObject *reduce_func = NULL;
2193 PyObject *reduce_value = NULL;
2194 PyObject *memo_key = NULL;
2195 int status = 0;
2196
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002197 if (Py_EnterRecursiveCall(" while pickling an object") < 0)
2198 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002199
2200 /* The extra pers_save argument is necessary to avoid calling save_pers()
2201 on its returned object. */
2202 if (!pers_save && self->pers_func) {
2203 /* save_pers() returns:
2204 -1 to signal an error;
2205 0 if it did nothing successfully;
2206 1 if a persistent id was saved.
2207 */
2208 if ((status = save_pers(self, obj, self->pers_func)) != 0)
2209 goto done;
2210 }
2211
2212 type = Py_TYPE(obj);
2213
2214 /* XXX: The old cPickle had an optimization that used switch-case
2215 statement dispatching on the first letter of the type name. It was
2216 probably not a bad idea after all. If benchmarks shows that particular
2217 optimization had some real benefits, it would be nice to add it
2218 back. */
2219
2220 /* Atom types; these aren't memoized, so don't check the memo. */
2221
2222 if (obj == Py_None) {
2223 status = save_none(self, obj);
2224 goto done;
2225 }
2226 else if (obj == Py_False || obj == Py_True) {
2227 status = save_bool(self, obj);
2228 goto done;
2229 }
2230 else if (type == &PyLong_Type) {
2231 status = save_long(self, obj);
2232 goto done;
2233 }
2234 else if (type == &PyFloat_Type) {
2235 status = save_float(self, obj);
2236 goto done;
2237 }
2238
2239 /* Check the memo to see if it has the object. If so, generate
2240 a GET (or BINGET) opcode, instead of pickling the object
2241 once again. */
2242 memo_key = PyLong_FromVoidPtr(obj);
2243 if (memo_key == NULL)
2244 goto error;
2245 if (PyDict_GetItem(self->memo, memo_key)) {
2246 if (memo_get(self, memo_key) < 0)
2247 goto error;
2248 goto done;
2249 }
2250
2251 if (type == &PyBytes_Type) {
2252 status = save_bytes(self, obj);
2253 goto done;
2254 }
2255 else if (type == &PyUnicode_Type) {
2256 status = save_unicode(self, obj);
2257 goto done;
2258 }
2259 else if (type == &PyDict_Type) {
2260 status = save_dict(self, obj);
2261 goto done;
2262 }
2263 else if (type == &PyList_Type) {
2264 status = save_list(self, obj);
2265 goto done;
2266 }
2267 else if (type == &PyTuple_Type) {
2268 status = save_tuple(self, obj);
2269 goto done;
2270 }
2271 else if (type == &PyType_Type) {
2272 status = save_global(self, obj, NULL);
2273 goto done;
2274 }
2275 else if (type == &PyFunction_Type) {
2276 status = save_global(self, obj, NULL);
2277 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
2278 /* fall back to reduce */
2279 PyErr_Clear();
2280 }
2281 else {
2282 goto done;
2283 }
2284 }
2285 else if (type == &PyCFunction_Type) {
2286 status = save_global(self, obj, NULL);
2287 goto done;
2288 }
2289 else if (PyType_IsSubtype(type, &PyType_Type)) {
2290 status = save_global(self, obj, NULL);
2291 goto done;
2292 }
2293
2294 /* XXX: This part needs some unit tests. */
2295
2296 /* Get a reduction callable, and call it. This may come from
2297 * copyreg.dispatch_table, the object's __reduce_ex__ method,
2298 * or the object's __reduce__ method.
2299 */
2300 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
2301 if (reduce_func != NULL) {
2302 /* Here, the reference count of the reduce_func object returned by
2303 PyDict_GetItem needs to be increased to be consistent with the one
2304 returned by PyObject_GetAttr. This is allow us to blindly DECREF
2305 reduce_func at the end of the save() routine.
2306 */
2307 Py_INCREF(reduce_func);
2308 Py_INCREF(obj);
2309 reduce_value = pickler_call(self, reduce_func, obj);
2310 }
2311 else {
2312 static PyObject *reduce_str = NULL;
2313 static PyObject *reduce_ex_str = NULL;
2314
2315 /* Cache the name of the reduce methods. */
2316 if (reduce_str == NULL) {
2317 reduce_str = PyUnicode_InternFromString("__reduce__");
2318 if (reduce_str == NULL)
2319 goto error;
2320 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
2321 if (reduce_ex_str == NULL)
2322 goto error;
2323 }
2324
2325 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
2326 automatically defined as __reduce__. While this is convenient, this
2327 make it impossible to know which method was actually called. Of
2328 course, this is not a big deal. But still, it would be nice to let
2329 the user know which method was called when something go
2330 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
2331 don't actually have to check for a __reduce__ method. */
2332
2333 /* Check for a __reduce_ex__ method. */
2334 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
2335 if (reduce_func != NULL) {
2336 PyObject *proto;
2337 proto = PyLong_FromLong(self->proto);
2338 if (proto != NULL) {
2339 reduce_value = pickler_call(self, reduce_func, proto);
2340 }
2341 }
2342 else {
2343 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2344 PyErr_Clear();
2345 else
2346 goto error;
2347 /* Check for a __reduce__ method. */
2348 reduce_func = PyObject_GetAttr(obj, reduce_str);
2349 if (reduce_func != NULL) {
2350 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
2351 }
2352 else {
2353 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
2354 type->tp_name, obj);
2355 goto error;
2356 }
2357 }
2358 }
2359
2360 if (reduce_value == NULL)
2361 goto error;
2362
2363 if (PyUnicode_Check(reduce_value)) {
2364 status = save_global(self, obj, reduce_value);
2365 goto done;
2366 }
2367
2368 if (!PyTuple_Check(reduce_value)) {
2369 PyErr_SetString(PicklingError,
2370 "__reduce__ must return a string or tuple");
2371 goto error;
2372 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002373
2374 status = save_reduce(self, reduce_value, obj);
2375
2376 if (0) {
2377 error:
2378 status = -1;
2379 }
2380 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002381 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002382 Py_XDECREF(memo_key);
2383 Py_XDECREF(reduce_func);
2384 Py_XDECREF(reduce_value);
2385
2386 return status;
2387}
2388
2389static int
2390dump(PicklerObject *self, PyObject *obj)
2391{
2392 const char stop_op = STOP;
2393
2394 if (self->proto >= 2) {
2395 char header[2];
2396
2397 header[0] = PROTO;
2398 assert(self->proto >= 0 && self->proto < 256);
2399 header[1] = (unsigned char)self->proto;
2400 if (pickler_write(self, header, 2) < 0)
2401 return -1;
2402 }
2403
2404 if (save(self, obj, 0) < 0 ||
2405 pickler_write(self, &stop_op, 1) < 0 ||
2406 pickler_write(self, NULL, 0) < 0)
2407 return -1;
2408
2409 return 0;
2410}
2411
2412PyDoc_STRVAR(Pickler_clear_memo_doc,
2413"clear_memo() -> None. Clears the pickler's \"memo\"."
2414"\n"
2415"The memo is the data structure that remembers which objects the\n"
2416"pickler has already seen, so that shared or recursive objects are\n"
2417"pickled by reference and not by value. This method is useful when\n"
2418"re-using picklers.");
2419
2420static PyObject *
2421Pickler_clear_memo(PicklerObject *self)
2422{
2423 if (self->memo)
2424 PyDict_Clear(self->memo);
2425
2426 Py_RETURN_NONE;
2427}
2428
2429PyDoc_STRVAR(Pickler_dump_doc,
2430"dump(obj) -> None. Write a pickled representation of obj to the open file.");
2431
2432static PyObject *
2433Pickler_dump(PicklerObject *self, PyObject *args)
2434{
2435 PyObject *obj;
2436
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00002437 /* Check whether the Pickler was initialized correctly (issue3664).
2438 Developers often forget to call __init__() in their subclasses, which
2439 would trigger a segfault without this check. */
2440 if (self->write == NULL) {
2441 PyErr_Format(PicklingError,
2442 "Pickler.__init__() was not called by %s.__init__()",
2443 Py_TYPE(self)->tp_name);
2444 return NULL;
2445 }
2446
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002447 if (!PyArg_ParseTuple(args, "O:dump", &obj))
2448 return NULL;
2449
2450 if (dump(self, obj) < 0)
2451 return NULL;
2452
2453 Py_RETURN_NONE;
2454}
2455
2456static struct PyMethodDef Pickler_methods[] = {
2457 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
2458 Pickler_dump_doc},
2459 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
2460 Pickler_clear_memo_doc},
2461 {NULL, NULL} /* sentinel */
2462};
2463
2464static void
2465Pickler_dealloc(PicklerObject *self)
2466{
2467 PyObject_GC_UnTrack(self);
2468
2469 Py_XDECREF(self->write);
2470 Py_XDECREF(self->memo);
2471 Py_XDECREF(self->pers_func);
2472 Py_XDECREF(self->arg);
2473 Py_XDECREF(self->fast_memo);
2474
2475 PyMem_Free(self->write_buf);
2476
2477 Py_TYPE(self)->tp_free((PyObject *)self);
2478}
2479
2480static int
2481Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
2482{
2483 Py_VISIT(self->write);
2484 Py_VISIT(self->memo);
2485 Py_VISIT(self->pers_func);
2486 Py_VISIT(self->arg);
2487 Py_VISIT(self->fast_memo);
2488 return 0;
2489}
2490
2491static int
2492Pickler_clear(PicklerObject *self)
2493{
2494 Py_CLEAR(self->write);
2495 Py_CLEAR(self->memo);
2496 Py_CLEAR(self->pers_func);
2497 Py_CLEAR(self->arg);
2498 Py_CLEAR(self->fast_memo);
2499
2500 PyMem_Free(self->write_buf);
2501 self->write_buf = NULL;
2502
2503 return 0;
2504}
2505
2506PyDoc_STRVAR(Pickler_doc,
2507"Pickler(file, protocol=None)"
2508"\n"
2509"This takes a binary file for writing a pickle data stream.\n"
2510"\n"
2511"The optional protocol argument tells the pickler to use the\n"
2512"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
2513"protocol is 3; a backward-incompatible protocol designed for\n"
2514"Python 3.0.\n"
2515"\n"
2516"Specifying a negative protocol version selects the highest\n"
2517"protocol version supported. The higher the protocol used, the\n"
2518"more recent the version of Python needed to read the pickle\n"
2519"produced.\n"
2520"\n"
2521"The file argument must have a write() method that accepts a single\n"
2522"bytes argument. It can thus be a file object opened for binary\n"
2523"writing, a io.BytesIO instance, or any other custom object that\n"
2524"meets this interface.\n");
2525
2526static int
2527Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
2528{
2529 static char *kwlist[] = {"file", "protocol", 0};
2530 PyObject *file;
2531 PyObject *proto_obj = NULL;
2532 long proto = 0;
2533
2534 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:Pickler",
2535 kwlist, &file, &proto_obj))
2536 return -1;
2537
2538 /* In case of multiple __init__() calls, clear previous content. */
2539 if (self->write != NULL)
2540 (void)Pickler_clear(self);
2541
2542 if (proto_obj == NULL || proto_obj == Py_None)
2543 proto = DEFAULT_PROTOCOL;
2544 else
2545 proto = PyLong_AsLong(proto_obj);
2546
2547 if (proto < 0)
2548 proto = HIGHEST_PROTOCOL;
2549 if (proto > HIGHEST_PROTOCOL) {
2550 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
2551 HIGHEST_PROTOCOL);
2552 return -1;
2553 }
2554
2555 self->proto = proto;
2556 self->bin = proto > 0;
2557 self->arg = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002558 self->fast = 0;
2559 self->fast_nesting = 0;
2560 self->fast_memo = NULL;
2561
2562 if (!PyObject_HasAttrString(file, "write")) {
2563 PyErr_SetString(PyExc_TypeError,
2564 "file must have a 'write' attribute");
2565 return -1;
2566 }
2567 self->write = PyObject_GetAttrString(file, "write");
2568 if (self->write == NULL)
2569 return -1;
2570 self->buf_size = 0;
2571 self->write_buf = (char *)PyMem_Malloc(WRITE_BUF_SIZE);
2572 if (self->write_buf == NULL) {
2573 PyErr_NoMemory();
2574 return -1;
2575 }
2576 self->pers_func = NULL;
2577 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
2578 self->pers_func = PyObject_GetAttrString((PyObject *)self,
2579 "persistent_id");
2580 if (self->pers_func == NULL)
2581 return -1;
2582 }
2583 self->memo = PyDict_New();
2584 if (self->memo == NULL)
2585 return -1;
2586
2587 return 0;
2588}
2589
2590static PyObject *
2591Pickler_get_memo(PicklerObject *self)
2592{
2593 if (self->memo == NULL)
2594 PyErr_SetString(PyExc_AttributeError, "memo");
2595 else
2596 Py_INCREF(self->memo);
2597 return self->memo;
2598}
2599
2600static int
2601Pickler_set_memo(PicklerObject *self, PyObject *value)
2602{
2603 PyObject *tmp;
2604
2605 if (value == NULL) {
2606 PyErr_SetString(PyExc_TypeError,
2607 "attribute deletion is not supported");
2608 return -1;
2609 }
2610 if (!PyDict_Check(value)) {
2611 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
2612 return -1;
2613 }
2614
2615 tmp = self->memo;
2616 Py_INCREF(value);
2617 self->memo = value;
2618 Py_XDECREF(tmp);
2619
2620 return 0;
2621}
2622
2623static PyObject *
2624Pickler_get_persid(PicklerObject *self)
2625{
2626 if (self->pers_func == NULL)
2627 PyErr_SetString(PyExc_AttributeError, "persistent_id");
2628 else
2629 Py_INCREF(self->pers_func);
2630 return self->pers_func;
2631}
2632
2633static int
2634Pickler_set_persid(PicklerObject *self, PyObject *value)
2635{
2636 PyObject *tmp;
2637
2638 if (value == NULL) {
2639 PyErr_SetString(PyExc_TypeError,
2640 "attribute deletion is not supported");
2641 return -1;
2642 }
2643 if (!PyCallable_Check(value)) {
2644 PyErr_SetString(PyExc_TypeError,
2645 "persistent_id must be a callable taking one argument");
2646 return -1;
2647 }
2648
2649 tmp = self->pers_func;
2650 Py_INCREF(value);
2651 self->pers_func = value;
2652 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
2653
2654 return 0;
2655}
2656
2657static PyMemberDef Pickler_members[] = {
2658 {"bin", T_INT, offsetof(PicklerObject, bin)},
2659 {"fast", T_INT, offsetof(PicklerObject, fast)},
2660 {NULL}
2661};
2662
2663static PyGetSetDef Pickler_getsets[] = {
2664 {"memo", (getter)Pickler_get_memo,
2665 (setter)Pickler_set_memo},
2666 {"persistent_id", (getter)Pickler_get_persid,
2667 (setter)Pickler_set_persid},
2668 {NULL}
2669};
2670
2671static PyTypeObject Pickler_Type = {
2672 PyVarObject_HEAD_INIT(NULL, 0)
2673 "_pickle.Pickler" , /*tp_name*/
2674 sizeof(PicklerObject), /*tp_basicsize*/
2675 0, /*tp_itemsize*/
2676 (destructor)Pickler_dealloc, /*tp_dealloc*/
2677 0, /*tp_print*/
2678 0, /*tp_getattr*/
2679 0, /*tp_setattr*/
2680 0, /*tp_compare*/
2681 0, /*tp_repr*/
2682 0, /*tp_as_number*/
2683 0, /*tp_as_sequence*/
2684 0, /*tp_as_mapping*/
2685 0, /*tp_hash*/
2686 0, /*tp_call*/
2687 0, /*tp_str*/
2688 0, /*tp_getattro*/
2689 0, /*tp_setattro*/
2690 0, /*tp_as_buffer*/
2691 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2692 Pickler_doc, /*tp_doc*/
2693 (traverseproc)Pickler_traverse, /*tp_traverse*/
2694 (inquiry)Pickler_clear, /*tp_clear*/
2695 0, /*tp_richcompare*/
2696 0, /*tp_weaklistoffset*/
2697 0, /*tp_iter*/
2698 0, /*tp_iternext*/
2699 Pickler_methods, /*tp_methods*/
2700 Pickler_members, /*tp_members*/
2701 Pickler_getsets, /*tp_getset*/
2702 0, /*tp_base*/
2703 0, /*tp_dict*/
2704 0, /*tp_descr_get*/
2705 0, /*tp_descr_set*/
2706 0, /*tp_dictoffset*/
2707 (initproc)Pickler_init, /*tp_init*/
2708 PyType_GenericAlloc, /*tp_alloc*/
2709 PyType_GenericNew, /*tp_new*/
2710 PyObject_GC_Del, /*tp_free*/
2711 0, /*tp_is_gc*/
2712};
2713
2714/* Temporary helper for calling self.find_class().
2715
2716 XXX: It would be nice to able to avoid Python function call overhead, by
2717 using directly the C version of find_class(), when find_class() is not
2718 overridden by a subclass. Although, this could become rather hackish. A
2719 simpler optimization would be to call the C function when self is not a
2720 subclass instance. */
2721static PyObject *
2722find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
2723{
2724 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
2725 module_name, global_name);
2726}
2727
2728static int
2729marker(UnpicklerObject *self)
2730{
2731 if (self->num_marks < 1) {
2732 PyErr_SetString(UnpicklingError, "could not find MARK");
2733 return -1;
2734 }
2735
2736 return self->marks[--self->num_marks];
2737}
2738
2739static int
2740load_none(UnpicklerObject *self)
2741{
2742 PDATA_APPEND(self->stack, Py_None, -1);
2743 return 0;
2744}
2745
2746static int
2747bad_readline(void)
2748{
2749 PyErr_SetString(UnpicklingError, "pickle data was truncated");
2750 return -1;
2751}
2752
2753static int
2754load_int(UnpicklerObject *self)
2755{
2756 PyObject *value;
2757 char *endptr, *s;
2758 Py_ssize_t len;
2759 long x;
2760
2761 if ((len = unpickler_readline(self, &s)) < 0)
2762 return -1;
2763 if (len < 2)
2764 return bad_readline();
2765
2766 errno = 0;
2767 /* XXX: Should the base argument of strtol() be explicitly set to 10? */
2768 x = strtol(s, &endptr, 0);
2769
2770 if (errno || (*endptr != '\n') || (endptr[1] != '\0')) {
2771 /* Hm, maybe we've got something long. Let's try reading
2772 * it as a Python long object. */
2773 errno = 0;
2774 /* XXX: Same thing about the base here. */
2775 value = PyLong_FromString(s, NULL, 0);
2776 if (value == NULL) {
2777 PyErr_SetString(PyExc_ValueError,
2778 "could not convert string to int");
2779 return -1;
2780 }
2781 }
2782 else {
2783 if (len == 3 && (x == 0 || x == 1)) {
2784 if ((value = PyBool_FromLong(x)) == NULL)
2785 return -1;
2786 }
2787 else {
2788 if ((value = PyLong_FromLong(x)) == NULL)
2789 return -1;
2790 }
2791 }
2792
2793 PDATA_PUSH(self->stack, value, -1);
2794 return 0;
2795}
2796
2797static int
2798load_bool(UnpicklerObject *self, PyObject *boolean)
2799{
2800 assert(boolean == Py_True || boolean == Py_False);
2801 PDATA_APPEND(self->stack, boolean, -1);
2802 return 0;
2803}
2804
2805/* s contains x bytes of a little-endian integer. Return its value as a
2806 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
2807 * int, but when x is 4 it's a signed one. This is an historical source
2808 * of x-platform bugs.
2809 */
2810static long
2811calc_binint(char *bytes, int size)
2812{
2813 unsigned char *s = (unsigned char *)bytes;
2814 int i = size;
2815 long x = 0;
2816
2817 for (i = 0; i < size; i++) {
2818 x |= (long)s[i] << (i * 8);
2819 }
2820
2821 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
2822 * is signed, so on a box with longs bigger than 4 bytes we need
2823 * to extend a BININT's sign bit to the full width.
2824 */
2825 if (SIZEOF_LONG > 4 && size == 4) {
2826 x |= -(x & (1L << 31));
2827 }
2828
2829 return x;
2830}
2831
2832static int
2833load_binintx(UnpicklerObject *self, char *s, int size)
2834{
2835 PyObject *value;
2836 long x;
2837
2838 x = calc_binint(s, size);
2839
2840 if ((value = PyLong_FromLong(x)) == NULL)
2841 return -1;
2842
2843 PDATA_PUSH(self->stack, value, -1);
2844 return 0;
2845}
2846
2847static int
2848load_binint(UnpicklerObject *self)
2849{
2850 char *s;
2851
2852 if (unpickler_read(self, &s, 4) < 0)
2853 return -1;
2854
2855 return load_binintx(self, s, 4);
2856}
2857
2858static int
2859load_binint1(UnpicklerObject *self)
2860{
2861 char *s;
2862
2863 if (unpickler_read(self, &s, 1) < 0)
2864 return -1;
2865
2866 return load_binintx(self, s, 1);
2867}
2868
2869static int
2870load_binint2(UnpicklerObject *self)
2871{
2872 char *s;
2873
2874 if (unpickler_read(self, &s, 2) < 0)
2875 return -1;
2876
2877 return load_binintx(self, s, 2);
2878}
2879
2880static int
2881load_long(UnpicklerObject *self)
2882{
2883 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00002884 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002885 Py_ssize_t len;
2886
2887 if ((len = unpickler_readline(self, &s)) < 0)
2888 return -1;
2889 if (len < 2)
2890 return bad_readline();
2891
Mark Dickinson8dd05142009-01-20 20:43:58 +00002892 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
2893 the 'L' before calling PyLong_FromString. In order to maintain
2894 compatibility with Python 3.0.0, we don't actually *require*
2895 the 'L' to be present. */
2896 if (s[len-2] == 'L') {
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00002897 s[len-2] = '\0';
Mark Dickinson8dd05142009-01-20 20:43:58 +00002898 }
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00002899 /* XXX: Should the base argument explicitly set to 10? */
2900 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00002901 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002902 return -1;
2903
2904 PDATA_PUSH(self->stack, value, -1);
2905 return 0;
2906}
2907
2908/* 'size' bytes contain the # of bytes of little-endian 256's-complement
2909 * data following.
2910 */
2911static int
2912load_counted_long(UnpicklerObject *self, int size)
2913{
2914 PyObject *value;
2915 char *nbytes;
2916 char *pdata;
2917
2918 assert(size == 1 || size == 4);
2919 if (unpickler_read(self, &nbytes, size) < 0)
2920 return -1;
2921
2922 size = calc_binint(nbytes, size);
2923 if (size < 0) {
2924 /* Corrupt or hostile pickle -- we never write one like this */
2925 PyErr_SetString(UnpicklingError,
2926 "LONG pickle has negative byte count");
2927 return -1;
2928 }
2929
2930 if (size == 0)
2931 value = PyLong_FromLong(0L);
2932 else {
2933 /* Read the raw little-endian bytes and convert. */
2934 if (unpickler_read(self, &pdata, size) < 0)
2935 return -1;
2936 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
2937 1 /* little endian */ , 1 /* signed */ );
2938 }
2939 if (value == NULL)
2940 return -1;
2941 PDATA_PUSH(self->stack, value, -1);
2942 return 0;
2943}
2944
2945static int
2946load_float(UnpicklerObject *self)
2947{
2948 PyObject *value;
2949 char *endptr, *s;
2950 Py_ssize_t len;
2951 double d;
2952
2953 if ((len = unpickler_readline(self, &s)) < 0)
2954 return -1;
2955 if (len < 2)
2956 return bad_readline();
2957
2958 errno = 0;
2959 d = PyOS_ascii_strtod(s, &endptr);
2960
2961 if (errno || (endptr[0] != '\n') || (endptr[1] != '\0')) {
2962 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
2963 return -1;
2964 }
2965
2966 if ((value = PyFloat_FromDouble(d)) == NULL)
2967 return -1;
2968
2969 PDATA_PUSH(self->stack, value, -1);
2970 return 0;
2971}
2972
2973static int
2974load_binfloat(UnpicklerObject *self)
2975{
2976 PyObject *value;
2977 double x;
2978 char *s;
2979
2980 if (unpickler_read(self, &s, 8) < 0)
2981 return -1;
2982
2983 x = _PyFloat_Unpack8((unsigned char *)s, 0);
2984 if (x == -1.0 && PyErr_Occurred())
2985 return -1;
2986
2987 if ((value = PyFloat_FromDouble(x)) == NULL)
2988 return -1;
2989
2990 PDATA_PUSH(self->stack, value, -1);
2991 return 0;
2992}
2993
2994static int
2995load_string(UnpicklerObject *self)
2996{
2997 PyObject *bytes;
2998 PyObject *str = NULL;
2999 Py_ssize_t len;
3000 char *s, *p;
3001
3002 if ((len = unpickler_readline(self, &s)) < 0)
3003 return -1;
3004 if (len < 3)
3005 return bad_readline();
3006 if ((s = strdup(s)) == NULL) {
3007 PyErr_NoMemory();
3008 return -1;
3009 }
3010
3011 /* Strip outermost quotes */
3012 while (s[len - 1] <= ' ')
3013 len--;
3014 if (s[0] == '"' && s[len - 1] == '"') {
3015 s[len - 1] = '\0';
3016 p = s + 1;
3017 len -= 2;
3018 }
3019 else if (s[0] == '\'' && s[len - 1] == '\'') {
3020 s[len - 1] = '\0';
3021 p = s + 1;
3022 len -= 2;
3023 }
3024 else {
3025 free(s);
3026 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
3027 return -1;
3028 }
3029
3030 /* Use the PyBytes API to decode the string, since that is what is used
3031 to encode, and then coerce the result to Unicode. */
3032 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
3033 free(s);
3034 if (bytes == NULL)
3035 return -1;
3036 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
3037 Py_DECREF(bytes);
3038 if (str == NULL)
3039 return -1;
3040
3041 PDATA_PUSH(self->stack, str, -1);
3042 return 0;
3043}
3044
3045static int
3046load_binbytes(UnpicklerObject *self)
3047{
3048 PyObject *bytes;
3049 long x;
3050 char *s;
3051
3052 if (unpickler_read(self, &s, 4) < 0)
3053 return -1;
3054
3055 x = calc_binint(s, 4);
3056 if (x < 0) {
3057 PyErr_SetString(UnpicklingError,
3058 "BINBYTES pickle has negative byte count");
3059 return -1;
3060 }
3061
3062 if (unpickler_read(self, &s, x) < 0)
3063 return -1;
3064 bytes = PyBytes_FromStringAndSize(s, x);
3065 if (bytes == NULL)
3066 return -1;
3067
3068 PDATA_PUSH(self->stack, bytes, -1);
3069 return 0;
3070}
3071
3072static int
3073load_short_binbytes(UnpicklerObject *self)
3074{
3075 PyObject *bytes;
3076 unsigned char x;
3077 char *s;
3078
3079 if (unpickler_read(self, &s, 1) < 0)
3080 return -1;
3081
3082 x = (unsigned char)s[0];
3083
3084 if (unpickler_read(self, &s, x) < 0)
3085 return -1;
3086
3087 bytes = PyBytes_FromStringAndSize(s, x);
3088 if (bytes == NULL)
3089 return -1;
3090
3091 PDATA_PUSH(self->stack, bytes, -1);
3092 return 0;
3093}
3094
3095static int
3096load_binstring(UnpicklerObject *self)
3097{
3098 PyObject *str;
3099 long x;
3100 char *s;
3101
3102 if (unpickler_read(self, &s, 4) < 0)
3103 return -1;
3104
3105 x = calc_binint(s, 4);
3106 if (x < 0) {
3107 PyErr_SetString(UnpicklingError,
3108 "BINSTRING pickle has negative byte count");
3109 return -1;
3110 }
3111
3112 if (unpickler_read(self, &s, x) < 0)
3113 return -1;
3114
3115 /* Convert Python 2.x strings to unicode. */
3116 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3117 if (str == NULL)
3118 return -1;
3119
3120 PDATA_PUSH(self->stack, str, -1);
3121 return 0;
3122}
3123
3124static int
3125load_short_binstring(UnpicklerObject *self)
3126{
3127 PyObject *str;
3128 unsigned char x;
3129 char *s;
3130
3131 if (unpickler_read(self, &s, 1) < 0)
3132 return -1;
3133
3134 x = (unsigned char)s[0];
3135
3136 if (unpickler_read(self, &s, x) < 0)
3137 return -1;
3138
3139 /* Convert Python 2.x strings to unicode. */
3140 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3141 if (str == NULL)
3142 return -1;
3143
3144 PDATA_PUSH(self->stack, str, -1);
3145 return 0;
3146}
3147
3148static int
3149load_unicode(UnpicklerObject *self)
3150{
3151 PyObject *str;
3152 Py_ssize_t len;
3153 char *s;
3154
3155 if ((len = unpickler_readline(self, &s)) < 0)
3156 return -1;
3157 if (len < 1)
3158 return bad_readline();
3159
3160 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
3161 if (str == NULL)
3162 return -1;
3163
3164 PDATA_PUSH(self->stack, str, -1);
3165 return 0;
3166}
3167
3168static int
3169load_binunicode(UnpicklerObject *self)
3170{
3171 PyObject *str;
3172 long size;
3173 char *s;
3174
3175 if (unpickler_read(self, &s, 4) < 0)
3176 return -1;
3177
3178 size = calc_binint(s, 4);
3179 if (size < 0) {
3180 PyErr_SetString(UnpicklingError,
3181 "BINUNICODE pickle has negative byte count");
3182 return -1;
3183 }
3184
3185 if (unpickler_read(self, &s, size) < 0)
3186 return -1;
3187
3188 str = PyUnicode_DecodeUTF8(s, size, NULL);
3189 if (str == NULL)
3190 return -1;
3191
3192 PDATA_PUSH(self->stack, str, -1);
3193 return 0;
3194}
3195
3196static int
3197load_tuple(UnpicklerObject *self)
3198{
3199 PyObject *tuple;
3200 int i;
3201
3202 if ((i = marker(self)) < 0)
3203 return -1;
3204
3205 tuple = Pdata_poptuple(self->stack, i);
3206 if (tuple == NULL)
3207 return -1;
3208 PDATA_PUSH(self->stack, tuple, -1);
3209 return 0;
3210}
3211
3212static int
3213load_counted_tuple(UnpicklerObject *self, int len)
3214{
3215 PyObject *tuple;
3216
3217 tuple = PyTuple_New(len);
3218 if (tuple == NULL)
3219 return -1;
3220
3221 while (--len >= 0) {
3222 PyObject *item;
3223
3224 PDATA_POP(self->stack, item);
3225 if (item == NULL)
3226 return -1;
3227 PyTuple_SET_ITEM(tuple, len, item);
3228 }
3229 PDATA_PUSH(self->stack, tuple, -1);
3230 return 0;
3231}
3232
3233static int
3234load_empty_list(UnpicklerObject *self)
3235{
3236 PyObject *list;
3237
3238 if ((list = PyList_New(0)) == NULL)
3239 return -1;
3240 PDATA_PUSH(self->stack, list, -1);
3241 return 0;
3242}
3243
3244static int
3245load_empty_dict(UnpicklerObject *self)
3246{
3247 PyObject *dict;
3248
3249 if ((dict = PyDict_New()) == NULL)
3250 return -1;
3251 PDATA_PUSH(self->stack, dict, -1);
3252 return 0;
3253}
3254
3255static int
3256load_list(UnpicklerObject *self)
3257{
3258 PyObject *list;
3259 int i;
3260
3261 if ((i = marker(self)) < 0)
3262 return -1;
3263
3264 list = Pdata_poplist(self->stack, i);
3265 if (list == NULL)
3266 return -1;
3267 PDATA_PUSH(self->stack, list, -1);
3268 return 0;
3269}
3270
3271static int
3272load_dict(UnpicklerObject *self)
3273{
3274 PyObject *dict, *key, *value;
3275 int i, j, k;
3276
3277 if ((i = marker(self)) < 0)
3278 return -1;
3279 j = self->stack->length;
3280
3281 if ((dict = PyDict_New()) == NULL)
3282 return -1;
3283
3284 for (k = i + 1; k < j; k += 2) {
3285 key = self->stack->data[k - 1];
3286 value = self->stack->data[k];
3287 if (PyDict_SetItem(dict, key, value) < 0) {
3288 Py_DECREF(dict);
3289 return -1;
3290 }
3291 }
3292 Pdata_clear(self->stack, i);
3293 PDATA_PUSH(self->stack, dict, -1);
3294 return 0;
3295}
3296
3297static PyObject *
3298instantiate(PyObject *cls, PyObject *args)
3299{
3300 PyObject *r = NULL;
3301
3302 /* XXX: The pickle.py module does not create instances this way when the
3303 args tuple is empty. See Unpickler._instantiate(). */
3304 if ((r = PyObject_CallObject(cls, args)))
3305 return r;
3306
3307 /* XXX: Is this still nescessary? */
3308 {
3309 PyObject *tp, *v, *tb, *tmp_value;
3310
3311 PyErr_Fetch(&tp, &v, &tb);
3312 tmp_value = v;
3313 /* NULL occurs when there was a KeyboardInterrupt */
3314 if (tmp_value == NULL)
3315 tmp_value = Py_None;
3316 if ((r = PyTuple_Pack(3, tmp_value, cls, args))) {
3317 Py_XDECREF(v);
3318 v = r;
3319 }
3320 PyErr_Restore(tp, v, tb);
3321 }
3322 return NULL;
3323}
3324
3325static int
3326load_obj(UnpicklerObject *self)
3327{
3328 PyObject *cls, *args, *obj = NULL;
3329 int i;
3330
3331 if ((i = marker(self)) < 0)
3332 return -1;
3333
3334 args = Pdata_poptuple(self->stack, i + 1);
3335 if (args == NULL)
3336 return -1;
3337
3338 PDATA_POP(self->stack, cls);
3339 if (cls) {
3340 obj = instantiate(cls, args);
3341 Py_DECREF(cls);
3342 }
3343 Py_DECREF(args);
3344 if (obj == NULL)
3345 return -1;
3346
3347 PDATA_PUSH(self->stack, obj, -1);
3348 return 0;
3349}
3350
3351static int
3352load_inst(UnpicklerObject *self)
3353{
3354 PyObject *cls = NULL;
3355 PyObject *args = NULL;
3356 PyObject *obj = NULL;
3357 PyObject *module_name;
3358 PyObject *class_name;
3359 Py_ssize_t len;
3360 int i;
3361 char *s;
3362
3363 if ((i = marker(self)) < 0)
3364 return -1;
3365 if ((len = unpickler_readline(self, &s)) < 0)
3366 return -1;
3367 if (len < 2)
3368 return bad_readline();
3369
3370 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
3371 identifiers are permitted in Python 3.0, since the INST opcode is only
3372 supported by older protocols on Python 2.x. */
3373 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3374 if (module_name == NULL)
3375 return -1;
3376
3377 if ((len = unpickler_readline(self, &s)) >= 0) {
3378 if (len < 2)
3379 return bad_readline();
3380 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3381 if (class_name == NULL) {
3382 cls = find_class(self, module_name, class_name);
3383 Py_DECREF(class_name);
3384 }
3385 }
3386 Py_DECREF(module_name);
3387
3388 if (cls == NULL)
3389 return -1;
3390
3391 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
3392 obj = instantiate(cls, args);
3393 Py_DECREF(args);
3394 }
3395 Py_DECREF(cls);
3396
3397 if (obj == NULL)
3398 return -1;
3399
3400 PDATA_PUSH(self->stack, obj, -1);
3401 return 0;
3402}
3403
3404static int
3405load_newobj(UnpicklerObject *self)
3406{
3407 PyObject *args = NULL;
3408 PyObject *clsraw = NULL;
3409 PyTypeObject *cls; /* clsraw cast to its true type */
3410 PyObject *obj;
3411
3412 /* Stack is ... cls argtuple, and we want to call
3413 * cls.__new__(cls, *argtuple).
3414 */
3415 PDATA_POP(self->stack, args);
3416 if (args == NULL)
3417 goto error;
3418 if (!PyTuple_Check(args)) {
3419 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
3420 goto error;
3421 }
3422
3423 PDATA_POP(self->stack, clsraw);
3424 cls = (PyTypeObject *)clsraw;
3425 if (cls == NULL)
3426 goto error;
3427 if (!PyType_Check(cls)) {
3428 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3429 "isn't a type object");
3430 goto error;
3431 }
3432 if (cls->tp_new == NULL) {
3433 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3434 "has NULL tp_new");
3435 goto error;
3436 }
3437
3438 /* Call __new__. */
3439 obj = cls->tp_new(cls, args, NULL);
3440 if (obj == NULL)
3441 goto error;
3442
3443 Py_DECREF(args);
3444 Py_DECREF(clsraw);
3445 PDATA_PUSH(self->stack, obj, -1);
3446 return 0;
3447
3448 error:
3449 Py_XDECREF(args);
3450 Py_XDECREF(clsraw);
3451 return -1;
3452}
3453
3454static int
3455load_global(UnpicklerObject *self)
3456{
3457 PyObject *global = NULL;
3458 PyObject *module_name;
3459 PyObject *global_name;
3460 Py_ssize_t len;
3461 char *s;
3462
3463 if ((len = unpickler_readline(self, &s)) < 0)
3464 return -1;
3465 if (len < 2)
3466 return bad_readline();
3467 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3468 if (!module_name)
3469 return -1;
3470
3471 if ((len = unpickler_readline(self, &s)) >= 0) {
3472 if (len < 2) {
3473 Py_DECREF(module_name);
3474 return bad_readline();
3475 }
3476 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3477 if (global_name) {
3478 global = find_class(self, module_name, global_name);
3479 Py_DECREF(global_name);
3480 }
3481 }
3482 Py_DECREF(module_name);
3483
3484 if (global == NULL)
3485 return -1;
3486 PDATA_PUSH(self->stack, global, -1);
3487 return 0;
3488}
3489
3490static int
3491load_persid(UnpicklerObject *self)
3492{
3493 PyObject *pid;
3494 Py_ssize_t len;
3495 char *s;
3496
3497 if (self->pers_func) {
3498 if ((len = unpickler_readline(self, &s)) < 0)
3499 return -1;
3500 if (len < 2)
3501 return bad_readline();
3502
3503 pid = PyBytes_FromStringAndSize(s, len - 1);
3504 if (pid == NULL)
3505 return -1;
3506
3507 /* Ugh... this does not leak since unpickler_call() steals the
3508 reference to pid first. */
3509 pid = unpickler_call(self, self->pers_func, pid);
3510 if (pid == NULL)
3511 return -1;
3512
3513 PDATA_PUSH(self->stack, pid, -1);
3514 return 0;
3515 }
3516 else {
3517 PyErr_SetString(UnpicklingError,
3518 "A load persistent id instruction was encountered,\n"
3519 "but no persistent_load function was specified.");
3520 return -1;
3521 }
3522}
3523
3524static int
3525load_binpersid(UnpicklerObject *self)
3526{
3527 PyObject *pid;
3528
3529 if (self->pers_func) {
3530 PDATA_POP(self->stack, pid);
3531 if (pid == NULL)
3532 return -1;
3533
3534 /* Ugh... this does not leak since unpickler_call() steals the
3535 reference to pid first. */
3536 pid = unpickler_call(self, self->pers_func, pid);
3537 if (pid == NULL)
3538 return -1;
3539
3540 PDATA_PUSH(self->stack, pid, -1);
3541 return 0;
3542 }
3543 else {
3544 PyErr_SetString(UnpicklingError,
3545 "A load persistent id instruction was encountered,\n"
3546 "but no persistent_load function was specified.");
3547 return -1;
3548 }
3549}
3550
3551static int
3552load_pop(UnpicklerObject *self)
3553{
3554 int len;
3555
3556 if ((len = self->stack->length) <= 0)
3557 return stack_underflow();
3558
3559 /* Note that we split the (pickle.py) stack into two stacks,
3560 * an object stack and a mark stack. We have to be clever and
3561 * pop the right one. We do this by looking at the top of the
3562 * mark stack.
3563 */
3564
3565 if ((self->num_marks > 0) && (self->marks[self->num_marks - 1] == len))
3566 self->num_marks--;
3567 else {
3568 len--;
3569 Py_DECREF(self->stack->data[len]);
3570 self->stack->length = len;
3571 }
3572
3573 return 0;
3574}
3575
3576static int
3577load_pop_mark(UnpicklerObject *self)
3578{
3579 int i;
3580
3581 if ((i = marker(self)) < 0)
3582 return -1;
3583
3584 Pdata_clear(self->stack, i);
3585
3586 return 0;
3587}
3588
3589static int
3590load_dup(UnpicklerObject *self)
3591{
3592 PyObject *last;
3593 int len;
3594
3595 if ((len = self->stack->length) <= 0)
3596 return stack_underflow();
3597 last = self->stack->data[len - 1];
3598 PDATA_APPEND(self->stack, last, -1);
3599 return 0;
3600}
3601
3602static int
3603load_get(UnpicklerObject *self)
3604{
3605 PyObject *key, *value;
3606 Py_ssize_t len;
3607 char *s;
3608
3609 if ((len = unpickler_readline(self, &s)) < 0)
3610 return -1;
3611 if (len < 2)
3612 return bad_readline();
3613
3614 key = PyLong_FromString(s, NULL, 10);
3615 if (key == NULL)
3616 return -1;
3617
3618 value = PyDict_GetItemWithError(self->memo, key);
3619 if (value == NULL) {
3620 if (!PyErr_Occurred())
3621 PyErr_SetObject(PyExc_KeyError, key);
3622 Py_DECREF(key);
3623 return -1;
3624 }
3625 Py_DECREF(key);
3626
3627 PDATA_APPEND(self->stack, value, -1);
3628 return 0;
3629}
3630
3631static int
3632load_binget(UnpicklerObject *self)
3633{
3634 PyObject *key, *value;
3635 char *s;
3636
3637 if (unpickler_read(self, &s, 1) < 0)
3638 return -1;
3639
3640 /* Here, the unsigned cast is necessary to avoid negative values. */
3641 key = PyLong_FromLong((long)(unsigned char)s[0]);
3642 if (key == NULL)
3643 return -1;
3644
3645 value = PyDict_GetItemWithError(self->memo, key);
3646 if (value == NULL) {
3647 if (!PyErr_Occurred())
3648 PyErr_SetObject(PyExc_KeyError, key);
3649 Py_DECREF(key);
3650 return -1;
3651 }
3652 Py_DECREF(key);
3653
3654 PDATA_APPEND(self->stack, value, -1);
3655 return 0;
3656}
3657
3658static int
3659load_long_binget(UnpicklerObject *self)
3660{
3661 PyObject *key, *value;
3662 char *s;
3663 long k;
3664
3665 if (unpickler_read(self, &s, 4) < 0)
3666 return -1;
3667
3668 k = (long)(unsigned char)s[0];
3669 k |= (long)(unsigned char)s[1] << 8;
3670 k |= (long)(unsigned char)s[2] << 16;
3671 k |= (long)(unsigned char)s[3] << 24;
3672
3673 key = PyLong_FromLong(k);
3674 if (key == NULL)
3675 return -1;
3676
3677 value = PyDict_GetItemWithError(self->memo, key);
3678 if (value == NULL) {
3679 if (!PyErr_Occurred())
3680 PyErr_SetObject(PyExc_KeyError, key);
3681 Py_DECREF(key);
3682 return -1;
3683 }
3684 Py_DECREF(key);
3685
3686 PDATA_APPEND(self->stack, value, -1);
3687 return 0;
3688}
3689
3690/* Push an object from the extension registry (EXT[124]). nbytes is
3691 * the number of bytes following the opcode, holding the index (code) value.
3692 */
3693static int
3694load_extension(UnpicklerObject *self, int nbytes)
3695{
3696 char *codebytes; /* the nbytes bytes after the opcode */
3697 long code; /* calc_binint returns long */
3698 PyObject *py_code; /* code as a Python int */
3699 PyObject *obj; /* the object to push */
3700 PyObject *pair; /* (module_name, class_name) */
3701 PyObject *module_name, *class_name;
3702
3703 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
3704 if (unpickler_read(self, &codebytes, nbytes) < 0)
3705 return -1;
3706 code = calc_binint(codebytes, nbytes);
3707 if (code <= 0) { /* note that 0 is forbidden */
3708 /* Corrupt or hostile pickle. */
3709 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
3710 return -1;
3711 }
3712
3713 /* Look for the code in the cache. */
3714 py_code = PyLong_FromLong(code);
3715 if (py_code == NULL)
3716 return -1;
3717 obj = PyDict_GetItem(extension_cache, py_code);
3718 if (obj != NULL) {
3719 /* Bingo. */
3720 Py_DECREF(py_code);
3721 PDATA_APPEND(self->stack, obj, -1);
3722 return 0;
3723 }
3724
3725 /* Look up the (module_name, class_name) pair. */
3726 pair = PyDict_GetItem(inverted_registry, py_code);
3727 if (pair == NULL) {
3728 Py_DECREF(py_code);
3729 PyErr_Format(PyExc_ValueError, "unregistered extension "
3730 "code %ld", code);
3731 return -1;
3732 }
3733 /* Since the extension registry is manipulable via Python code,
3734 * confirm that pair is really a 2-tuple of strings.
3735 */
3736 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
3737 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
3738 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
3739 Py_DECREF(py_code);
3740 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
3741 "isn't a 2-tuple of strings", code);
3742 return -1;
3743 }
3744 /* Load the object. */
3745 obj = find_class(self, module_name, class_name);
3746 if (obj == NULL) {
3747 Py_DECREF(py_code);
3748 return -1;
3749 }
3750 /* Cache code -> obj. */
3751 code = PyDict_SetItem(extension_cache, py_code, obj);
3752 Py_DECREF(py_code);
3753 if (code < 0) {
3754 Py_DECREF(obj);
3755 return -1;
3756 }
3757 PDATA_PUSH(self->stack, obj, -1);
3758 return 0;
3759}
3760
3761static int
3762load_put(UnpicklerObject *self)
3763{
3764 PyObject *key, *value;
3765 Py_ssize_t len;
3766 char *s;
3767 int x;
3768
3769 if ((len = unpickler_readline(self, &s)) < 0)
3770 return -1;
3771 if (len < 2)
3772 return bad_readline();
3773 if ((x = self->stack->length) <= 0)
3774 return stack_underflow();
3775
3776 key = PyLong_FromString(s, NULL, 10);
3777 if (key == NULL)
3778 return -1;
3779 value = self->stack->data[x - 1];
3780
3781 x = PyDict_SetItem(self->memo, key, value);
3782 Py_DECREF(key);
3783 return x;
3784}
3785
3786static int
3787load_binput(UnpicklerObject *self)
3788{
3789 PyObject *key, *value;
3790 char *s;
3791 int x;
3792
3793 if (unpickler_read(self, &s, 1) < 0)
3794 return -1;
3795 if ((x = self->stack->length) <= 0)
3796 return stack_underflow();
3797
3798 key = PyLong_FromLong((long)(unsigned char)s[0]);
3799 if (key == NULL)
3800 return -1;
3801 value = self->stack->data[x - 1];
3802
3803 x = PyDict_SetItem(self->memo, key, value);
3804 Py_DECREF(key);
3805 return x;
3806}
3807
3808static int
3809load_long_binput(UnpicklerObject *self)
3810{
3811 PyObject *key, *value;
3812 long k;
3813 char *s;
3814 int x;
3815
3816 if (unpickler_read(self, &s, 4) < 0)
3817 return -1;
3818 if ((x = self->stack->length) <= 0)
3819 return stack_underflow();
3820
3821 k = (long)(unsigned char)s[0];
3822 k |= (long)(unsigned char)s[1] << 8;
3823 k |= (long)(unsigned char)s[2] << 16;
3824 k |= (long)(unsigned char)s[3] << 24;
3825
3826 key = PyLong_FromLong(k);
3827 if (key == NULL)
3828 return -1;
3829 value = self->stack->data[x - 1];
3830
3831 x = PyDict_SetItem(self->memo, key, value);
3832 Py_DECREF(key);
3833 return x;
3834}
3835
3836static int
3837do_append(UnpicklerObject *self, int x)
3838{
3839 PyObject *value;
3840 PyObject *list;
3841 int len, i;
3842
3843 len = self->stack->length;
3844 if (x > len || x <= 0)
3845 return stack_underflow();
3846 if (len == x) /* nothing to do */
3847 return 0;
3848
3849 list = self->stack->data[x - 1];
3850
3851 if (PyList_Check(list)) {
3852 PyObject *slice;
3853 Py_ssize_t list_len;
3854
3855 slice = Pdata_poplist(self->stack, x);
3856 if (!slice)
3857 return -1;
3858 list_len = PyList_GET_SIZE(list);
3859 i = PyList_SetSlice(list, list_len, list_len, slice);
3860 Py_DECREF(slice);
3861 return i;
3862 }
3863 else {
3864 PyObject *append_func;
3865
3866 append_func = PyObject_GetAttrString(list, "append");
3867 if (append_func == NULL)
3868 return -1;
3869 for (i = x; i < len; i++) {
3870 PyObject *result;
3871
3872 value = self->stack->data[i];
3873 result = unpickler_call(self, append_func, value);
3874 if (result == NULL) {
3875 Pdata_clear(self->stack, i + 1);
3876 self->stack->length = x;
3877 return -1;
3878 }
3879 Py_DECREF(result);
3880 }
3881 self->stack->length = x;
3882 }
3883
3884 return 0;
3885}
3886
3887static int
3888load_append(UnpicklerObject *self)
3889{
3890 return do_append(self, self->stack->length - 1);
3891}
3892
3893static int
3894load_appends(UnpicklerObject *self)
3895{
3896 return do_append(self, marker(self));
3897}
3898
3899static int
3900do_setitems(UnpicklerObject *self, int x)
3901{
3902 PyObject *value, *key;
3903 PyObject *dict;
3904 int len, i;
3905 int status = 0;
3906
3907 len = self->stack->length;
3908 if (x > len || x <= 0)
3909 return stack_underflow();
3910 if (len == x) /* nothing to do */
3911 return 0;
3912 if ((len - x) % 2 != 0) {
3913 /* Currupt or hostile pickle -- we never write one like this. */
3914 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
3915 return -1;
3916 }
3917
3918 /* Here, dict does not actually need to be a PyDict; it could be anything
3919 that supports the __setitem__ attribute. */
3920 dict = self->stack->data[x - 1];
3921
3922 for (i = x + 1; i < len; i += 2) {
3923 key = self->stack->data[i - 1];
3924 value = self->stack->data[i];
3925 if (PyObject_SetItem(dict, key, value) < 0) {
3926 status = -1;
3927 break;
3928 }
3929 }
3930
3931 Pdata_clear(self->stack, x);
3932 return status;
3933}
3934
3935static int
3936load_setitem(UnpicklerObject *self)
3937{
3938 return do_setitems(self, self->stack->length - 2);
3939}
3940
3941static int
3942load_setitems(UnpicklerObject *self)
3943{
3944 return do_setitems(self, marker(self));
3945}
3946
3947static int
3948load_build(UnpicklerObject *self)
3949{
3950 PyObject *state, *inst, *slotstate;
3951 PyObject *setstate;
3952 int status = 0;
3953
3954 /* Stack is ... instance, state. We want to leave instance at
3955 * the stack top, possibly mutated via instance.__setstate__(state).
3956 */
3957 if (self->stack->length < 2)
3958 return stack_underflow();
3959
3960 PDATA_POP(self->stack, state);
3961 if (state == NULL)
3962 return -1;
3963
3964 inst = self->stack->data[self->stack->length - 1];
3965
3966 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00003967 if (setstate == NULL) {
3968 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3969 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00003970 else {
3971 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00003972 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00003973 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003974 }
3975 else {
3976 PyObject *result;
3977
3978 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitroud79dc622008-09-05 00:03:33 +00003979 /* Ugh... this does not leak since unpickler_call() steals the
3980 reference to state first. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003981 result = unpickler_call(self, setstate, state);
3982 Py_DECREF(setstate);
3983 if (result == NULL)
3984 return -1;
3985 Py_DECREF(result);
3986 return 0;
3987 }
3988
3989 /* A default __setstate__. First see whether state embeds a
3990 * slot state dict too (a proto 2 addition).
3991 */
3992 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
3993 PyObject *tmp = state;
3994
3995 state = PyTuple_GET_ITEM(tmp, 0);
3996 slotstate = PyTuple_GET_ITEM(tmp, 1);
3997 Py_INCREF(state);
3998 Py_INCREF(slotstate);
3999 Py_DECREF(tmp);
4000 }
4001 else
4002 slotstate = NULL;
4003
4004 /* Set inst.__dict__ from the state dict (if any). */
4005 if (state != Py_None) {
4006 PyObject *dict;
4007
4008 if (!PyDict_Check(state)) {
4009 PyErr_SetString(UnpicklingError, "state is not a dictionary");
4010 goto error;
4011 }
4012 dict = PyObject_GetAttrString(inst, "__dict__");
4013 if (dict == NULL)
4014 goto error;
4015
4016 PyDict_Update(dict, state);
4017 Py_DECREF(dict);
4018 }
4019
4020 /* Also set instance attributes from the slotstate dict (if any). */
4021 if (slotstate != NULL) {
4022 PyObject *d_key, *d_value;
4023 Py_ssize_t i;
4024
4025 if (!PyDict_Check(slotstate)) {
4026 PyErr_SetString(UnpicklingError,
4027 "slot state is not a dictionary");
4028 goto error;
4029 }
4030 i = 0;
4031 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
4032 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
4033 goto error;
4034 }
4035 }
4036
4037 if (0) {
4038 error:
4039 status = -1;
4040 }
4041
4042 Py_DECREF(state);
4043 Py_XDECREF(slotstate);
4044 return status;
4045}
4046
4047static int
4048load_mark(UnpicklerObject *self)
4049{
4050
4051 /* Note that we split the (pickle.py) stack into two stacks, an
4052 * object stack and a mark stack. Here we push a mark onto the
4053 * mark stack.
4054 */
4055
4056 if ((self->num_marks + 1) >= self->marks_size) {
4057 size_t alloc;
4058 int *marks;
4059
4060 /* Use the size_t type to check for overflow. */
4061 alloc = ((size_t)self->num_marks << 1) + 20;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00004062 if (alloc > PY_SSIZE_T_MAX ||
4063 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004064 PyErr_NoMemory();
4065 return -1;
4066 }
4067
4068 if (self->marks == NULL)
4069 marks = (int *)PyMem_Malloc(alloc * sizeof(int));
4070 else
4071 marks = (int *)PyMem_Realloc(self->marks, alloc * sizeof(int));
4072 if (marks == NULL) {
4073 PyErr_NoMemory();
4074 return -1;
4075 }
4076 self->marks = marks;
4077 self->marks_size = (Py_ssize_t)alloc;
4078 }
4079
4080 self->marks[self->num_marks++] = self->stack->length;
4081
4082 return 0;
4083}
4084
4085static int
4086load_reduce(UnpicklerObject *self)
4087{
4088 PyObject *callable = NULL;
4089 PyObject *argtup = NULL;
4090 PyObject *obj = NULL;
4091
4092 PDATA_POP(self->stack, argtup);
4093 if (argtup == NULL)
4094 return -1;
4095 PDATA_POP(self->stack, callable);
4096 if (callable) {
4097 obj = instantiate(callable, argtup);
4098 Py_DECREF(callable);
4099 }
4100 Py_DECREF(argtup);
4101
4102 if (obj == NULL)
4103 return -1;
4104
4105 PDATA_PUSH(self->stack, obj, -1);
4106 return 0;
4107}
4108
4109/* Just raises an error if we don't know the protocol specified. PROTO
4110 * is the first opcode for protocols >= 2.
4111 */
4112static int
4113load_proto(UnpicklerObject *self)
4114{
4115 char *s;
4116 int i;
4117
4118 if (unpickler_read(self, &s, 1) < 0)
4119 return -1;
4120
4121 i = (unsigned char)s[0];
4122 if (i <= HIGHEST_PROTOCOL)
4123 return 0;
4124
4125 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
4126 return -1;
4127}
4128
4129static PyObject *
4130load(UnpicklerObject *self)
4131{
4132 PyObject *err;
4133 PyObject *value = NULL;
4134 char *s;
4135
4136 self->num_marks = 0;
4137 if (self->stack->length)
4138 Pdata_clear(self->stack, 0);
4139
4140 /* Convenient macros for the dispatch while-switch loop just below. */
4141#define OP(opcode, load_func) \
4142 case opcode: if (load_func(self) < 0) break; continue;
4143
4144#define OP_ARG(opcode, load_func, arg) \
4145 case opcode: if (load_func(self, (arg)) < 0) break; continue;
4146
4147 while (1) {
4148 if (unpickler_read(self, &s, 1) < 0)
4149 break;
4150
4151 switch ((enum opcode)s[0]) {
4152 OP(NONE, load_none)
4153 OP(BININT, load_binint)
4154 OP(BININT1, load_binint1)
4155 OP(BININT2, load_binint2)
4156 OP(INT, load_int)
4157 OP(LONG, load_long)
4158 OP_ARG(LONG1, load_counted_long, 1)
4159 OP_ARG(LONG4, load_counted_long, 4)
4160 OP(FLOAT, load_float)
4161 OP(BINFLOAT, load_binfloat)
4162 OP(BINBYTES, load_binbytes)
4163 OP(SHORT_BINBYTES, load_short_binbytes)
4164 OP(BINSTRING, load_binstring)
4165 OP(SHORT_BINSTRING, load_short_binstring)
4166 OP(STRING, load_string)
4167 OP(UNICODE, load_unicode)
4168 OP(BINUNICODE, load_binunicode)
4169 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
4170 OP_ARG(TUPLE1, load_counted_tuple, 1)
4171 OP_ARG(TUPLE2, load_counted_tuple, 2)
4172 OP_ARG(TUPLE3, load_counted_tuple, 3)
4173 OP(TUPLE, load_tuple)
4174 OP(EMPTY_LIST, load_empty_list)
4175 OP(LIST, load_list)
4176 OP(EMPTY_DICT, load_empty_dict)
4177 OP(DICT, load_dict)
4178 OP(OBJ, load_obj)
4179 OP(INST, load_inst)
4180 OP(NEWOBJ, load_newobj)
4181 OP(GLOBAL, load_global)
4182 OP(APPEND, load_append)
4183 OP(APPENDS, load_appends)
4184 OP(BUILD, load_build)
4185 OP(DUP, load_dup)
4186 OP(BINGET, load_binget)
4187 OP(LONG_BINGET, load_long_binget)
4188 OP(GET, load_get)
4189 OP(MARK, load_mark)
4190 OP(BINPUT, load_binput)
4191 OP(LONG_BINPUT, load_long_binput)
4192 OP(PUT, load_put)
4193 OP(POP, load_pop)
4194 OP(POP_MARK, load_pop_mark)
4195 OP(SETITEM, load_setitem)
4196 OP(SETITEMS, load_setitems)
4197 OP(PERSID, load_persid)
4198 OP(BINPERSID, load_binpersid)
4199 OP(REDUCE, load_reduce)
4200 OP(PROTO, load_proto)
4201 OP_ARG(EXT1, load_extension, 1)
4202 OP_ARG(EXT2, load_extension, 2)
4203 OP_ARG(EXT4, load_extension, 4)
4204 OP_ARG(NEWTRUE, load_bool, Py_True)
4205 OP_ARG(NEWFALSE, load_bool, Py_False)
4206
4207 case STOP:
4208 break;
4209
4210 case '\0':
4211 PyErr_SetNone(PyExc_EOFError);
4212 return NULL;
4213
4214 default:
4215 PyErr_Format(UnpicklingError,
4216 "invalid load key, '%c'.", s[0]);
4217 return NULL;
4218 }
4219
4220 break; /* and we are done! */
4221 }
4222
4223 /* XXX: It is not clear what this is actually for. */
4224 if ((err = PyErr_Occurred())) {
4225 if (err == PyExc_EOFError) {
4226 PyErr_SetNone(PyExc_EOFError);
4227 }
4228 return NULL;
4229 }
4230
4231 PDATA_POP(self->stack, value);
4232 return value;
4233}
4234
4235PyDoc_STRVAR(Unpickler_load_doc,
4236"load() -> object. Load a pickle."
4237"\n"
4238"Read a pickled object representation from the open file object given in\n"
4239"the constructor, and return the reconstituted object hierarchy specified\n"
4240"therein.\n");
4241
4242static PyObject *
4243Unpickler_load(UnpicklerObject *self)
4244{
4245 /* Check whether the Unpickler was initialized correctly. This prevents
4246 segfaulting if a subclass overridden __init__ with a function that does
4247 not call Unpickler.__init__(). Here, we simply ensure that self->read
4248 is not NULL. */
4249 if (self->read == NULL) {
4250 PyErr_Format(UnpicklingError,
4251 "Unpickler.__init__() was not called by %s.__init__()",
4252 Py_TYPE(self)->tp_name);
4253 return NULL;
4254 }
4255
4256 return load(self);
4257}
4258
4259/* The name of find_class() is misleading. In newer pickle protocols, this
4260 function is used for loading any global (i.e., functions), not just
4261 classes. The name is kept only for backward compatibility. */
4262
4263PyDoc_STRVAR(Unpickler_find_class_doc,
4264"find_class(module_name, global_name) -> object.\n"
4265"\n"
4266"Return an object from a specified module, importing the module if\n"
4267"necessary. Subclasses may override this method (e.g. to restrict\n"
4268"unpickling of arbitrary classes and functions).\n"
4269"\n"
4270"This method is called whenever a class or a function object is\n"
4271"needed. Both arguments passed are str objects.\n");
4272
4273static PyObject *
4274Unpickler_find_class(UnpicklerObject *self, PyObject *args)
4275{
4276 PyObject *global;
4277 PyObject *modules_dict;
4278 PyObject *module;
4279 PyObject *module_name, *global_name;
4280
4281 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
4282 &module_name, &global_name))
4283 return NULL;
4284
4285 modules_dict = PySys_GetObject("modules");
4286 if (modules_dict == NULL)
4287 return NULL;
4288
4289 module = PyDict_GetItem(modules_dict, module_name);
4290 if (module == NULL) {
4291 module = PyImport_Import(module_name);
4292 if (module == NULL)
4293 return NULL;
4294 global = PyObject_GetAttr(module, global_name);
4295 Py_DECREF(module);
4296 }
4297 else {
4298 global = PyObject_GetAttr(module, global_name);
4299 }
4300 return global;
4301}
4302
4303static struct PyMethodDef Unpickler_methods[] = {
4304 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
4305 Unpickler_load_doc},
4306 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
4307 Unpickler_find_class_doc},
4308 {NULL, NULL} /* sentinel */
4309};
4310
4311static void
4312Unpickler_dealloc(UnpicklerObject *self)
4313{
4314 PyObject_GC_UnTrack((PyObject *)self);
4315 Py_XDECREF(self->readline);
4316 Py_XDECREF(self->read);
4317 Py_XDECREF(self->memo);
4318 Py_XDECREF(self->stack);
4319 Py_XDECREF(self->pers_func);
4320 Py_XDECREF(self->arg);
4321 Py_XDECREF(self->last_string);
4322
4323 PyMem_Free(self->marks);
4324 free(self->encoding);
4325 free(self->errors);
4326
4327 Py_TYPE(self)->tp_free((PyObject *)self);
4328}
4329
4330static int
4331Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
4332{
4333 Py_VISIT(self->readline);
4334 Py_VISIT(self->read);
4335 Py_VISIT(self->memo);
4336 Py_VISIT(self->stack);
4337 Py_VISIT(self->pers_func);
4338 Py_VISIT(self->arg);
4339 Py_VISIT(self->last_string);
4340 return 0;
4341}
4342
4343static int
4344Unpickler_clear(UnpicklerObject *self)
4345{
4346 Py_CLEAR(self->readline);
4347 Py_CLEAR(self->read);
4348 Py_CLEAR(self->memo);
4349 Py_CLEAR(self->stack);
4350 Py_CLEAR(self->pers_func);
4351 Py_CLEAR(self->arg);
4352 Py_CLEAR(self->last_string);
4353
4354 PyMem_Free(self->marks);
4355 self->marks = NULL;
4356 free(self->encoding);
4357 self->encoding = NULL;
4358 free(self->errors);
4359 self->errors = NULL;
4360
4361 return 0;
4362}
4363
4364PyDoc_STRVAR(Unpickler_doc,
4365"Unpickler(file, *, encoding='ASCII', errors='strict')"
4366"\n"
4367"This takes a binary file for reading a pickle data stream.\n"
4368"\n"
4369"The protocol version of the pickle is detected automatically, so no\n"
4370"proto argument is needed.\n"
4371"\n"
4372"The file-like object must have two methods, a read() method\n"
4373"that takes an integer argument, and a readline() method that\n"
4374"requires no arguments. Both methods should return bytes.\n"
4375"Thus file-like object can be a binary file object opened for\n"
4376"reading, a BytesIO object, or any other custom object that\n"
4377"meets this interface.\n"
4378"\n"
4379"Optional keyword arguments are encoding and errors, which are\n"
4380"used to decode 8-bit string instances pickled by Python 2.x.\n"
4381"These default to 'ASCII' and 'strict', respectively.\n");
4382
4383static int
4384Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
4385{
4386 static char *kwlist[] = {"file", "encoding", "errors", 0};
4387 PyObject *file;
4388 char *encoding = NULL;
4389 char *errors = NULL;
4390
4391 /* XXX: That is an horrible error message. But, I don't know how to do
4392 better... */
4393 if (Py_SIZE(args) != 1) {
4394 PyErr_Format(PyExc_TypeError,
4395 "%s takes exactly one positional argument (%zd given)",
4396 Py_TYPE(self)->tp_name, Py_SIZE(args));
4397 return -1;
4398 }
4399
4400 /* Arguments parsing needs to be done in the __init__() method to allow
4401 subclasses to define their own __init__() method, which may (or may
4402 not) support Unpickler arguments. However, this means we need to be
4403 extra careful in the other Unpickler methods, since a subclass could
4404 forget to call Unpickler.__init__() thus breaking our internal
4405 invariants. */
4406 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist,
4407 &file, &encoding, &errors))
4408 return -1;
4409
4410 /* In case of multiple __init__() calls, clear previous content. */
4411 if (self->read != NULL)
4412 (void)Unpickler_clear(self);
4413
4414 self->read = PyObject_GetAttrString(file, "read");
4415 self->readline = PyObject_GetAttrString(file, "readline");
4416 if (self->readline == NULL || self->read == NULL)
4417 return -1;
4418
4419 if (encoding == NULL)
4420 encoding = "ASCII";
4421 if (errors == NULL)
4422 errors = "strict";
4423
4424 self->encoding = strdup(encoding);
4425 self->errors = strdup(errors);
4426 if (self->encoding == NULL || self->errors == NULL) {
4427 PyErr_NoMemory();
4428 return -1;
4429 }
4430
4431 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
4432 self->pers_func = PyObject_GetAttrString((PyObject *)self,
4433 "persistent_load");
4434 if (self->pers_func == NULL)
4435 return -1;
4436 }
4437 else {
4438 self->pers_func = NULL;
4439 }
4440
4441 self->stack = (Pdata *)Pdata_New();
4442 if (self->stack == NULL)
4443 return -1;
4444
4445 self->memo = PyDict_New();
4446 if (self->memo == NULL)
4447 return -1;
4448
4449 return 0;
4450}
4451
4452static PyObject *
4453Unpickler_get_memo(UnpicklerObject *self)
4454{
4455 if (self->memo == NULL)
4456 PyErr_SetString(PyExc_AttributeError, "memo");
4457 else
4458 Py_INCREF(self->memo);
4459 return self->memo;
4460}
4461
4462static int
4463Unpickler_set_memo(UnpicklerObject *self, PyObject *value)
4464{
4465 PyObject *tmp;
4466
4467 if (value == NULL) {
4468 PyErr_SetString(PyExc_TypeError,
4469 "attribute deletion is not supported");
4470 return -1;
4471 }
4472 if (!PyDict_Check(value)) {
4473 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
4474 return -1;
4475 }
4476
4477 tmp = self->memo;
4478 Py_INCREF(value);
4479 self->memo = value;
4480 Py_XDECREF(tmp);
4481
4482 return 0;
4483}
4484
4485static PyObject *
4486Unpickler_get_persload(UnpicklerObject *self)
4487{
4488 if (self->pers_func == NULL)
4489 PyErr_SetString(PyExc_AttributeError, "persistent_load");
4490 else
4491 Py_INCREF(self->pers_func);
4492 return self->pers_func;
4493}
4494
4495static int
4496Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
4497{
4498 PyObject *tmp;
4499
4500 if (value == NULL) {
4501 PyErr_SetString(PyExc_TypeError,
4502 "attribute deletion is not supported");
4503 return -1;
4504 }
4505 if (!PyCallable_Check(value)) {
4506 PyErr_SetString(PyExc_TypeError,
4507 "persistent_load must be a callable taking "
4508 "one argument");
4509 return -1;
4510 }
4511
4512 tmp = self->pers_func;
4513 Py_INCREF(value);
4514 self->pers_func = value;
4515 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
4516
4517 return 0;
4518}
4519
4520static PyGetSetDef Unpickler_getsets[] = {
4521 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
4522 {"persistent_load", (getter)Unpickler_get_persload,
4523 (setter)Unpickler_set_persload},
4524 {NULL}
4525};
4526
4527static PyTypeObject Unpickler_Type = {
4528 PyVarObject_HEAD_INIT(NULL, 0)
4529 "_pickle.Unpickler", /*tp_name*/
4530 sizeof(UnpicklerObject), /*tp_basicsize*/
4531 0, /*tp_itemsize*/
4532 (destructor)Unpickler_dealloc, /*tp_dealloc*/
4533 0, /*tp_print*/
4534 0, /*tp_getattr*/
4535 0, /*tp_setattr*/
4536 0, /*tp_compare*/
4537 0, /*tp_repr*/
4538 0, /*tp_as_number*/
4539 0, /*tp_as_sequence*/
4540 0, /*tp_as_mapping*/
4541 0, /*tp_hash*/
4542 0, /*tp_call*/
4543 0, /*tp_str*/
4544 0, /*tp_getattro*/
4545 0, /*tp_setattro*/
4546 0, /*tp_as_buffer*/
4547 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4548 Unpickler_doc, /*tp_doc*/
4549 (traverseproc)Unpickler_traverse, /*tp_traverse*/
4550 (inquiry)Unpickler_clear, /*tp_clear*/
4551 0, /*tp_richcompare*/
4552 0, /*tp_weaklistoffset*/
4553 0, /*tp_iter*/
4554 0, /*tp_iternext*/
4555 Unpickler_methods, /*tp_methods*/
4556 0, /*tp_members*/
4557 Unpickler_getsets, /*tp_getset*/
4558 0, /*tp_base*/
4559 0, /*tp_dict*/
4560 0, /*tp_descr_get*/
4561 0, /*tp_descr_set*/
4562 0, /*tp_dictoffset*/
4563 (initproc)Unpickler_init, /*tp_init*/
4564 PyType_GenericAlloc, /*tp_alloc*/
4565 PyType_GenericNew, /*tp_new*/
4566 PyObject_GC_Del, /*tp_free*/
4567 0, /*tp_is_gc*/
4568};
4569
4570static int
4571init_stuff(void)
4572{
4573 PyObject *copyreg;
4574
4575 copyreg = PyImport_ImportModule("copyreg");
4576 if (!copyreg)
4577 return -1;
4578
4579 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
4580 if (!dispatch_table)
4581 goto error;
4582
4583 extension_registry = \
4584 PyObject_GetAttrString(copyreg, "_extension_registry");
4585 if (!extension_registry)
4586 goto error;
4587
4588 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
4589 if (!inverted_registry)
4590 goto error;
4591
4592 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
4593 if (!extension_cache)
4594 goto error;
4595
4596 Py_DECREF(copyreg);
4597
4598 empty_tuple = PyTuple_New(0);
4599 if (empty_tuple == NULL)
4600 return -1;
4601
4602 two_tuple = PyTuple_New(2);
4603 if (two_tuple == NULL)
4604 return -1;
4605 /* We use this temp container with no regard to refcounts, or to
4606 * keeping containees alive. Exempt from GC, because we don't
4607 * want anything looking at two_tuple() by magic.
4608 */
4609 PyObject_GC_UnTrack(two_tuple);
4610
4611 return 0;
4612
4613 error:
4614 Py_DECREF(copyreg);
4615 return -1;
4616}
4617
4618static struct PyModuleDef _picklemodule = {
4619 PyModuleDef_HEAD_INIT,
4620 "_pickle",
4621 pickle_module_doc,
4622 -1,
4623 NULL,
4624 NULL,
4625 NULL,
4626 NULL,
4627 NULL
4628};
4629
4630PyMODINIT_FUNC
4631PyInit__pickle(void)
4632{
4633 PyObject *m;
4634
4635 if (PyType_Ready(&Unpickler_Type) < 0)
4636 return NULL;
4637 if (PyType_Ready(&Pickler_Type) < 0)
4638 return NULL;
4639 if (PyType_Ready(&Pdata_Type) < 0)
4640 return NULL;
4641
4642 /* Create the module and add the functions. */
4643 m = PyModule_Create(&_picklemodule);
4644 if (m == NULL)
4645 return NULL;
4646
4647 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
4648 return NULL;
4649 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
4650 return NULL;
4651
4652 /* Initialize the exceptions. */
4653 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
4654 if (PickleError == NULL)
4655 return NULL;
4656 PicklingError = \
4657 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
4658 if (PicklingError == NULL)
4659 return NULL;
4660 UnpicklingError = \
4661 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
4662 if (UnpicklingError == NULL)
4663 return NULL;
4664
4665 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
4666 return NULL;
4667 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
4668 return NULL;
4669 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
4670 return NULL;
4671
4672 if (init_stuff() < 0)
4673 return NULL;
4674
4675 return m;
4676}