blob: 3ad55b587ff59eb3f9881120082f251ad49d0034 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
13
14/* Pickle opcodes. These must be kept updated with pickle.py.
15 Extensive docs are in pickletools.py. */
16enum opcode {
17 MARK = '(',
18 STOP = '.',
19 POP = '0',
20 POP_MARK = '1',
21 DUP = '2',
22 FLOAT = 'F',
23 INT = 'I',
24 BININT = 'J',
25 BININT1 = 'K',
26 LONG = 'L',
27 BININT2 = 'M',
28 NONE = 'N',
29 PERSID = 'P',
30 BINPERSID = 'Q',
31 REDUCE = 'R',
32 STRING = 'S',
33 BINSTRING = 'T',
34 SHORT_BINSTRING = 'U',
35 UNICODE = 'V',
36 BINUNICODE = 'X',
37 APPEND = 'a',
38 BUILD = 'b',
39 GLOBAL = 'c',
40 DICT = 'd',
41 EMPTY_DICT = '}',
42 APPENDS = 'e',
43 GET = 'g',
44 BINGET = 'h',
45 INST = 'i',
46 LONG_BINGET = 'j',
47 LIST = 'l',
48 EMPTY_LIST = ']',
49 OBJ = 'o',
50 PUT = 'p',
51 BINPUT = 'q',
52 LONG_BINPUT = 'r',
53 SETITEM = 's',
54 TUPLE = 't',
55 EMPTY_TUPLE = ')',
56 SETITEMS = 'u',
57 BINFLOAT = 'G',
58
59 /* Protocol 2. */
60 PROTO = '\x80',
61 NEWOBJ = '\x81',
62 EXT1 = '\x82',
63 EXT2 = '\x83',
64 EXT4 = '\x84',
65 TUPLE1 = '\x85',
66 TUPLE2 = '\x86',
67 TUPLE3 = '\x87',
68 NEWTRUE = '\x88',
69 NEWFALSE = '\x89',
70 LONG1 = '\x8a',
71 LONG4 = '\x8b',
72
73 /* Protocol 3 (Python 3.x) */
74 BINBYTES = 'B',
75 SHORT_BINBYTES = 'C',
76};
77
78/* These aren't opcodes -- they're ways to pickle bools before protocol 2
79 * so that unpicklers written before bools were introduced unpickle them
80 * as ints, but unpicklers after can recognize that bools were intended.
81 * Note that protocol 2 added direct ways to pickle bools.
82 */
83#undef TRUE
84#define TRUE "I01\n"
85#undef FALSE
86#define FALSE "I00\n"
87
88enum {
89 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
90 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
91 break if this gets out of synch with pickle.py, but it's unclear that would
92 help anything either. */
93 BATCHSIZE = 1000,
94
95 /* Nesting limit until Pickler, when running in "fast mode", starts
96 checking for self-referential data-structures. */
97 FAST_NESTING_LIMIT = 50,
98
99 /* Size of the write buffer of Pickler. Higher values will reduce the
100 number of calls to the write() method of the output stream. */
101 WRITE_BUF_SIZE = 256,
102};
103
104/* Exception classes for pickle. These should override the ones defined in
105 pickle.py, when the C-optimized Pickler and Unpickler are used. */
106static PyObject *PickleError;
107static PyObject *PicklingError;
108static PyObject *UnpicklingError;
109
110/* copyreg.dispatch_table, {type_object: pickling_function} */
111static PyObject *dispatch_table;
112/* For EXT[124] opcodes. */
113/* copyreg._extension_registry, {(module_name, function_name): code} */
114static PyObject *extension_registry;
115/* copyreg._inverted_registry, {code: (module_name, function_name)} */
116static PyObject *inverted_registry;
117/* copyreg._extension_cache, {code: object} */
118static PyObject *extension_cache;
119
120/* XXX: Are these really nescessary? */
121/* As the name says, an empty tuple. */
122static PyObject *empty_tuple;
123/* For looking up name pairs in copyreg._extension_registry. */
124static PyObject *two_tuple;
125
126static int
127stack_underflow(void)
128{
129 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
130 return -1;
131}
132
133/* Internal data type used as the unpickling stack. */
134typedef struct {
135 PyObject_HEAD
136 int length; /* number of initial slots in data currently used */
137 int size; /* number of slots in data allocated */
138 PyObject **data;
139} Pdata;
140
141static void
142Pdata_dealloc(Pdata *self)
143{
144 int i;
145 PyObject **p;
146
147 for (i = self->length, p = self->data; --i >= 0; p++) {
148 Py_DECREF(*p);
149 }
150 if (self->data)
151 PyMem_Free(self->data);
152 PyObject_Del(self);
153}
154
155static PyTypeObject Pdata_Type = {
156 PyVarObject_HEAD_INIT(NULL, 0)
157 "_pickle.Pdata", /*tp_name*/
158 sizeof(Pdata), /*tp_basicsize*/
159 0, /*tp_itemsize*/
160 (destructor)Pdata_dealloc, /*tp_dealloc*/
161};
162
163static PyObject *
164Pdata_New(void)
165{
166 Pdata *self;
167
168 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
169 return NULL;
170 self->size = 8;
171 self->length = 0;
172 self->data = PyMem_Malloc(self->size * sizeof(PyObject *));
173 if (self->data)
174 return (PyObject *)self;
175 Py_DECREF(self);
176 return PyErr_NoMemory();
177}
178
179
180/* Retain only the initial clearto items. If clearto >= the current
181 * number of items, this is a (non-erroneous) NOP.
182 */
183static int
184Pdata_clear(Pdata *self, int clearto)
185{
186 int i;
187 PyObject **p;
188
189 if (clearto < 0)
190 return stack_underflow();
191 if (clearto >= self->length)
192 return 0;
193
194 for (i = self->length, p = self->data + clearto; --i >= clearto; p++) {
195 Py_CLEAR(*p);
196 }
197 self->length = clearto;
198
199 return 0;
200}
201
202static int
203Pdata_grow(Pdata *self)
204{
205 int bigger;
206 size_t nbytes;
207 PyObject **tmp;
208
209 bigger = (self->size << 1) + 1;
210 if (bigger <= 0) /* was 0, or new value overflows */
211 goto nomemory;
212 if ((int)(size_t)bigger != bigger)
213 goto nomemory;
214 nbytes = (size_t)bigger * sizeof(PyObject *);
215 if (nbytes / sizeof(PyObject *) != (size_t)bigger)
216 goto nomemory;
217 tmp = PyMem_Realloc(self->data, nbytes);
218 if (tmp == NULL)
219 goto nomemory;
220 self->data = tmp;
221 self->size = bigger;
222 return 0;
223
224 nomemory:
225 PyErr_NoMemory();
226 return -1;
227}
228
229/* D is a Pdata*. Pop the topmost element and store it into V, which
230 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
231 * is raised and V is set to NULL.
232 */
233static PyObject *
234Pdata_pop(Pdata *self)
235{
236 if (self->length == 0) {
237 PyErr_SetString(UnpicklingError, "bad pickle data");
238 return NULL;
239 }
240 return self->data[--(self->length)];
241}
242#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
243
244static int
245Pdata_push(Pdata *self, PyObject *obj)
246{
247 if (self->length == self->size && Pdata_grow(self) < 0) {
248 return -1;
249 }
250 self->data[self->length++] = obj;
251 return 0;
252}
253
254/* Push an object on stack, transferring its ownership to the stack. */
255#define PDATA_PUSH(D, O, ER) do { \
256 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
257
258/* Push an object on stack, adding a new reference to the object. */
259#define PDATA_APPEND(D, O, ER) do { \
260 Py_INCREF((O)); \
261 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
262
263static PyObject *
264Pdata_poptuple(Pdata *self, Py_ssize_t start)
265{
266 PyObject *tuple;
267 Py_ssize_t len, i, j;
268
269 len = self->length - start;
270 tuple = PyTuple_New(len);
271 if (tuple == NULL)
272 return NULL;
273 for (i = start, j = 0; j < len; i++, j++)
274 PyTuple_SET_ITEM(tuple, j, self->data[i]);
275
276 self->length = start;
277 return tuple;
278}
279
280static PyObject *
281Pdata_poplist(Pdata *self, Py_ssize_t start)
282{
283 PyObject *list;
284 Py_ssize_t len, i, j;
285
286 len = self->length - start;
287 list = PyList_New(len);
288 if (list == NULL)
289 return NULL;
290 for (i = start, j = 0; j < len; i++, j++)
291 PyList_SET_ITEM(list, j, self->data[i]);
292
293 self->length = start;
294 return list;
295}
296
297typedef struct PicklerObject {
298 PyObject_HEAD
299 PyObject *write; /* write() method of the output stream */
300 PyObject *memo; /* Memo dictionary, keep track of the seen
301 objects to support self-referential objects
302 pickling. */
303 PyObject *pers_func; /* persistent_id() method, can be NULL */
304 PyObject *arg;
305 int proto; /* Pickle protocol number, >= 0 */
306 int bin; /* Boolean, true if proto > 0 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000307 int buf_size; /* Size of the current buffered pickle data */
308 char *write_buf; /* Write buffer, this is to avoid calling the
309 write() method of the output stream too
310 often. */
311 int fast; /* Enable fast mode if set to a true value.
312 The fast mode disable the usage of memo,
313 therefore speeding the pickling process by
314 not generating superfluous PUT opcodes. It
315 should not be used if with self-referential
316 objects. */
317 int fast_nesting;
318 PyObject *fast_memo;
319} PicklerObject;
320
321typedef struct UnpicklerObject {
322 PyObject_HEAD
323 Pdata *stack; /* Pickle data stack, store unpickled objects. */
324 PyObject *readline; /* readline() method of the output stream */
325 PyObject *read; /* read() method of the output stream */
326 PyObject *memo; /* Memo dictionary, provide the objects stored
327 using the PUT opcodes. */
328 PyObject *arg;
329 PyObject *pers_func; /* persistent_load() method, can be NULL. */
330 PyObject *last_string; /* Reference to the last string read by the
331 readline() method. */
332 char *buffer; /* Reading buffer. */
333 char *encoding; /* Name of the encoding to be used for
334 decoding strings pickled using Python
335 2.x. The default value is "ASCII" */
336 char *errors; /* Name of errors handling scheme to used when
337 decoding strings. The default value is
338 "strict". */
339 int *marks; /* Mark stack, used for unpickling container
340 objects. */
341 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
342 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
343} UnpicklerObject;
344
345/* Forward declarations */
346static int save(PicklerObject *, PyObject *, int);
347static int save_reduce(PicklerObject *, PyObject *, PyObject *);
348static PyTypeObject Pickler_Type;
349static PyTypeObject Unpickler_Type;
350
351
352/* Helpers for creating the argument tuple passed to functions. This has the
353 performance advantage of calling PyTuple_New() only once. */
354
355#define ARG_TUP(self, obj) do { \
356 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
357 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
358 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
359 } \
360 else { \
361 Py_DECREF((obj)); \
362 } \
363 } while (0)
364
365#define FREE_ARG_TUP(self) do { \
366 if ((self)->arg->ob_refcnt > 1) \
367 Py_CLEAR((self)->arg); \
368 } while (0)
369
370/* A temporary cleaner API for fast single argument function call.
371
372 XXX: Does caching the argument tuple provides any real performance benefits?
373
374 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
375 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
376 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
377 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
378 (i.e, call PyTuple_New() and store the returned value in an array), to save
379 one second (wall clock time). Either ways, the loading time a pickle stream
380 large enough to generate this number of calls would be massively
381 overwhelmed by other factors, like I/O throughput, the GC traversal and
382 object allocation overhead. So, I really doubt these functions provide any
383 real benefits.
384
385 On the other hand, oprofile reports that pickle spends a lot of time in
386 these functions. But, that is probably more related to the function call
387 overhead, than the argument tuple allocation.
388
389 XXX: And, what is the reference behavior of these? Steal, borrow? At first
390 glance, it seems to steal the reference of 'arg' and borrow the reference
391 of 'func'.
392 */
393static PyObject *
394pickler_call(PicklerObject *self, PyObject *func, PyObject *arg)
395{
396 PyObject *result = NULL;
397
398 ARG_TUP(self, arg);
399 if (self->arg) {
400 result = PyObject_Call(func, self->arg, NULL);
401 FREE_ARG_TUP(self);
402 }
403 return result;
404}
405
406static PyObject *
407unpickler_call(UnpicklerObject *self, PyObject *func, PyObject *arg)
408{
409 PyObject *result = NULL;
410
411 ARG_TUP(self, arg);
412 if (self->arg) {
413 result = PyObject_Call(func, self->arg, NULL);
414 FREE_ARG_TUP(self);
415 }
416 return result;
417}
418
419static Py_ssize_t
420pickler_write(PicklerObject *self, const char *s, Py_ssize_t n)
421{
422 PyObject *data, *result;
423
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000424 if (self->write_buf == NULL) {
425 PyErr_SetString(PyExc_SystemError, "invalid write buffer");
426 return -1;
427 }
428
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000429 if (s == NULL) {
430 if (!(self->buf_size))
431 return 0;
432 data = PyBytes_FromStringAndSize(self->write_buf, self->buf_size);
433 if (data == NULL)
434 return -1;
435 }
436 else {
437 if (self->buf_size && (n + self->buf_size) > WRITE_BUF_SIZE) {
438 if (pickler_write(self, NULL, 0) < 0)
439 return -1;
440 }
441
442 if (n > WRITE_BUF_SIZE) {
443 if (!(data = PyBytes_FromStringAndSize(s, n)))
444 return -1;
445 }
446 else {
447 memcpy(self->write_buf + self->buf_size, s, n);
448 self->buf_size += n;
449 return n;
450 }
451 }
452
453 /* object with write method */
454 result = pickler_call(self, self->write, data);
455 if (result == NULL)
456 return -1;
457
458 Py_DECREF(result);
459 self->buf_size = 0;
460 return n;
461}
462
463/* XXX: These read/readline functions ought to be optimized. Buffered I/O
464 might help a lot, especially with the new (but much slower) io library.
465 On the other hand, the added complexity might not worth it.
466 */
467
468/* Read at least n characters from the input stream and set s to the current
469 reading position. */
470static Py_ssize_t
471unpickler_read(UnpicklerObject *self, char **s, Py_ssize_t n)
472{
473 PyObject *len;
474 PyObject *data;
475
476 len = PyLong_FromSsize_t(n);
477 if (len == NULL)
478 return -1;
479
480 data = unpickler_call(self, self->read, len);
481 if (data == NULL)
482 return -1;
483
484 /* XXX: Should bytearray be supported too? */
485 if (!PyBytes_Check(data)) {
486 PyErr_SetString(PyExc_ValueError,
487 "read() from the underlying stream did not"
488 "return bytes");
Amaury Forgeot d'Arc5f952572008-11-25 21:11:54 +0000489 Py_DECREF(data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000490 return -1;
491 }
492
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000493 if (PyBytes_GET_SIZE(data) != n) {
494 PyErr_SetNone(PyExc_EOFError);
Amaury Forgeot d'Arc5f952572008-11-25 21:11:54 +0000495 Py_DECREF(data);
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000496 return -1;
497 }
498
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000499 Py_XDECREF(self->last_string);
500 self->last_string = data;
501
502 if (!(*s = PyBytes_AS_STRING(data)))
503 return -1;
504
505 return n;
506}
507
508static Py_ssize_t
509unpickler_readline(UnpicklerObject *self, char **s)
510{
511 PyObject *data;
512
513 data = PyObject_CallObject(self->readline, empty_tuple);
514 if (data == NULL)
515 return -1;
516
517 /* XXX: Should bytearray be supported too? */
518 if (!PyBytes_Check(data)) {
519 PyErr_SetString(PyExc_ValueError,
520 "readline() from the underlying stream did not"
521 "return bytes");
522 return -1;
523 }
524
525 Py_XDECREF(self->last_string);
526 self->last_string = data;
527
528 if (!(*s = PyBytes_AS_STRING(data)))
529 return -1;
530
531 return PyBytes_GET_SIZE(data);
532}
533
534/* Generate a GET opcode for an object stored in the memo. The 'key' argument
535 should be the address of the object as returned by PyLong_FromVoidPtr(). */
536static int
537memo_get(PicklerObject *self, PyObject *key)
538{
539 PyObject *value;
540 PyObject *memo_id;
541 long x;
542 char pdata[30];
543 int len;
544
545 value = PyDict_GetItemWithError(self->memo, key);
546 if (value == NULL) {
547 if (!PyErr_Occurred())
548 PyErr_SetObject(PyExc_KeyError, key);
549 return -1;
550 }
551
552 memo_id = PyTuple_GetItem(value, 0);
553 if (memo_id == NULL)
554 return -1;
555
556 if (!PyLong_Check(memo_id)) {
557 PyErr_SetString(PicklingError, "memo id must be an integer");
558 return -1;
559 }
560 x = PyLong_AsLong(memo_id);
561 if (x == -1 && PyErr_Occurred())
562 return -1;
563
564 if (!self->bin) {
565 pdata[0] = GET;
566 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
567 len = (int)strlen(pdata);
568 }
569 else {
570 if (x < 256) {
571 pdata[0] = BINGET;
572 pdata[1] = (unsigned char)(x & 0xff);
573 len = 2;
574 }
575 else if (x <= 0xffffffffL) {
576 pdata[0] = LONG_BINGET;
577 pdata[1] = (unsigned char)(x & 0xff);
578 pdata[2] = (unsigned char)((x >> 8) & 0xff);
579 pdata[3] = (unsigned char)((x >> 16) & 0xff);
580 pdata[4] = (unsigned char)((x >> 24) & 0xff);
581 len = 5;
582 }
583 else { /* unlikely */
584 PyErr_SetString(PicklingError,
585 "memo id too large for LONG_BINGET");
586 return -1;
587 }
588 }
589
590 if (pickler_write(self, pdata, len) < 0)
591 return -1;
592
593 return 0;
594}
595
596/* Store an object in the memo, assign it a new unique ID based on the number
597 of objects currently stored in the memo and generate a PUT opcode. */
598static int
599memo_put(PicklerObject *self, PyObject *obj)
600{
601 PyObject *key = NULL;
602 PyObject *memo_id = NULL;
603 PyObject *tuple = NULL;
604 long x;
605 char pdata[30];
606 int len;
607 int status = 0;
608
609 if (self->fast)
610 return 0;
611
612 key = PyLong_FromVoidPtr(obj);
613 if (key == NULL)
614 goto error;
615 if ((x = PyDict_Size(self->memo)) < 0)
616 goto error;
617 memo_id = PyLong_FromLong(x);
618 if (memo_id == NULL)
619 goto error;
620 tuple = PyTuple_New(2);
621 if (tuple == NULL)
622 goto error;
623
624 Py_INCREF(memo_id);
625 PyTuple_SET_ITEM(tuple, 0, memo_id);
626 Py_INCREF(obj);
627 PyTuple_SET_ITEM(tuple, 1, obj);
628 if (PyDict_SetItem(self->memo, key, tuple) < 0)
629 goto error;
630
631 if (!self->bin) {
632 pdata[0] = PUT;
633 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
634 len = strlen(pdata);
635 }
636 else {
637 if (x < 256) {
638 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +0000639 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000640 len = 2;
641 }
642 else if (x <= 0xffffffffL) {
643 pdata[0] = LONG_BINPUT;
644 pdata[1] = (unsigned char)(x & 0xff);
645 pdata[2] = (unsigned char)((x >> 8) & 0xff);
646 pdata[3] = (unsigned char)((x >> 16) & 0xff);
647 pdata[4] = (unsigned char)((x >> 24) & 0xff);
648 len = 5;
649 }
650 else { /* unlikely */
651 PyErr_SetString(PicklingError,
652 "memo id too large for LONG_BINPUT");
653 return -1;
654 }
655 }
656
657 if (pickler_write(self, pdata, len) < 0)
658 goto error;
659
660 if (0) {
661 error:
662 status = -1;
663 }
664
665 Py_XDECREF(key);
666 Py_XDECREF(memo_id);
667 Py_XDECREF(tuple);
668
669 return status;
670}
671
672static PyObject *
673whichmodule(PyObject *global, PyObject *global_name)
674{
675 Py_ssize_t i, j;
676 static PyObject *module_str = NULL;
677 static PyObject *main_str = NULL;
678 PyObject *module_name;
679 PyObject *modules_dict;
680 PyObject *module;
681 PyObject *obj;
682
683 if (module_str == NULL) {
684 module_str = PyUnicode_InternFromString("__module__");
685 if (module_str == NULL)
686 return NULL;
687 main_str = PyUnicode_InternFromString("__main__");
688 if (main_str == NULL)
689 return NULL;
690 }
691
692 module_name = PyObject_GetAttr(global, module_str);
693
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +0000694 /* In some rare cases (e.g., bound methods of extension types),
695 __module__ can be None. If it is so, then search sys.modules
696 for the module of global. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000697 if (module_name == Py_None) {
698 Py_DECREF(module_name);
699 goto search;
700 }
701
702 if (module_name) {
703 return module_name;
704 }
705 if (PyErr_ExceptionMatches(PyExc_AttributeError))
706 PyErr_Clear();
707 else
708 return NULL;
709
710 search:
711 modules_dict = PySys_GetObject("modules");
712 if (modules_dict == NULL)
713 return NULL;
714
715 i = 0;
716 module_name = NULL;
717 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Mark Dickinson211c6252009-02-01 10:28:51 +0000718 if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000719 continue;
720
721 obj = PyObject_GetAttr(module, global_name);
722 if (obj == NULL) {
723 if (PyErr_ExceptionMatches(PyExc_AttributeError))
724 PyErr_Clear();
725 else
726 return NULL;
727 continue;
728 }
729
730 if (obj != global) {
731 Py_DECREF(obj);
732 continue;
733 }
734
735 Py_DECREF(obj);
736 break;
737 }
738
739 /* If no module is found, use __main__. */
740 if (!j) {
741 module_name = main_str;
742 }
743
744 Py_INCREF(module_name);
745 return module_name;
746}
747
748/* fast_save_enter() and fast_save_leave() are guards against recursive
749 objects when Pickler is used with the "fast mode" (i.e., with object
750 memoization disabled). If the nesting of a list or dict object exceed
751 FAST_NESTING_LIMIT, these guards will start keeping an internal
752 reference to the seen list or dict objects and check whether these objects
753 are recursive. These are not strictly necessary, since save() has a
754 hard-coded recursion limit, but they give a nicer error message than the
755 typical RuntimeError. */
756static int
757fast_save_enter(PicklerObject *self, PyObject *obj)
758{
759 /* if fast_nesting < 0, we're doing an error exit. */
760 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
761 PyObject *key = NULL;
762 if (self->fast_memo == NULL) {
763 self->fast_memo = PyDict_New();
764 if (self->fast_memo == NULL) {
765 self->fast_nesting = -1;
766 return 0;
767 }
768 }
769 key = PyLong_FromVoidPtr(obj);
770 if (key == NULL)
771 return 0;
772 if (PyDict_GetItem(self->fast_memo, key)) {
773 Py_DECREF(key);
774 PyErr_Format(PyExc_ValueError,
775 "fast mode: can't pickle cyclic objects "
776 "including object type %.200s at %p",
777 obj->ob_type->tp_name, obj);
778 self->fast_nesting = -1;
779 return 0;
780 }
781 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
782 Py_DECREF(key);
783 self->fast_nesting = -1;
784 return 0;
785 }
786 Py_DECREF(key);
787 }
788 return 1;
789}
790
791static int
792fast_save_leave(PicklerObject *self, PyObject *obj)
793{
794 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
795 PyObject *key = PyLong_FromVoidPtr(obj);
796 if (key == NULL)
797 return 0;
798 if (PyDict_DelItem(self->fast_memo, key) < 0) {
799 Py_DECREF(key);
800 return 0;
801 }
802 Py_DECREF(key);
803 }
804 return 1;
805}
806
807static int
808save_none(PicklerObject *self, PyObject *obj)
809{
810 const char none_op = NONE;
811 if (pickler_write(self, &none_op, 1) < 0)
812 return -1;
813
814 return 0;
815}
816
817static int
818save_bool(PicklerObject *self, PyObject *obj)
819{
820 static const char *buf[2] = { FALSE, TRUE };
821 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
822 int p = (obj == Py_True);
823
824 if (self->proto >= 2) {
825 const char bool_op = p ? NEWTRUE : NEWFALSE;
826 if (pickler_write(self, &bool_op, 1) < 0)
827 return -1;
828 }
829 else if (pickler_write(self, buf[p], len[p]) < 0)
830 return -1;
831
832 return 0;
833}
834
835static int
836save_int(PicklerObject *self, long x)
837{
838 char pdata[32];
839 int len = 0;
840
841 if (!self->bin
842#if SIZEOF_LONG > 4
843 || x > 0x7fffffffL || x < -0x80000000L
844#endif
845 ) {
846 /* Text-mode pickle, or long too big to fit in the 4-byte
847 * signed BININT format: store as a string.
848 */
Mark Dickinson8dd05142009-01-20 20:43:58 +0000849 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
850 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000851 if (pickler_write(self, pdata, strlen(pdata)) < 0)
852 return -1;
853 }
854 else {
855 /* Binary pickle and x fits in a signed 4-byte int. */
856 pdata[1] = (unsigned char)(x & 0xff);
857 pdata[2] = (unsigned char)((x >> 8) & 0xff);
858 pdata[3] = (unsigned char)((x >> 16) & 0xff);
859 pdata[4] = (unsigned char)((x >> 24) & 0xff);
860
861 if ((pdata[4] == 0) && (pdata[3] == 0)) {
862 if (pdata[2] == 0) {
863 pdata[0] = BININT1;
864 len = 2;
865 }
866 else {
867 pdata[0] = BININT2;
868 len = 3;
869 }
870 }
871 else {
872 pdata[0] = BININT;
873 len = 5;
874 }
875
876 if (pickler_write(self, pdata, len) < 0)
877 return -1;
878 }
879
880 return 0;
881}
882
883static int
884save_long(PicklerObject *self, PyObject *obj)
885{
886 PyObject *repr = NULL;
887 Py_ssize_t size;
888 long val = PyLong_AsLong(obj);
889 int status = 0;
890
891 const char long_op = LONG;
892
893 if (val == -1 && PyErr_Occurred()) {
894 /* out of range for int pickling */
895 PyErr_Clear();
896 }
897 else
898 return save_int(self, val);
899
900 if (self->proto >= 2) {
901 /* Linear-time pickling. */
902 size_t nbits;
903 size_t nbytes;
904 unsigned char *pdata;
905 char header[5];
906 int i;
907 int sign = _PyLong_Sign(obj);
908
909 if (sign == 0) {
910 header[0] = LONG1;
911 header[1] = 0; /* It's 0 -- an empty bytestring. */
912 if (pickler_write(self, header, 2) < 0)
913 goto error;
914 return 0;
915 }
916 nbits = _PyLong_NumBits(obj);
917 if (nbits == (size_t)-1 && PyErr_Occurred())
918 goto error;
919 /* How many bytes do we need? There are nbits >> 3 full
920 * bytes of data, and nbits & 7 leftover bits. If there
921 * are any leftover bits, then we clearly need another
922 * byte. Wnat's not so obvious is that we *probably*
923 * need another byte even if there aren't any leftovers:
924 * the most-significant bit of the most-significant byte
925 * acts like a sign bit, and it's usually got a sense
926 * opposite of the one we need. The exception is longs
927 * of the form -(2**(8*j-1)) for j > 0. Such a long is
928 * its own 256's-complement, so has the right sign bit
929 * even without the extra byte. That's a pain to check
930 * for in advance, though, so we always grab an extra
931 * byte at the start, and cut it back later if possible.
932 */
933 nbytes = (nbits >> 3) + 1;
934 if (nbytes > INT_MAX) {
935 PyErr_SetString(PyExc_OverflowError,
936 "long too large to pickle");
937 goto error;
938 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000939 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000940 if (repr == NULL)
941 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000942 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000943 i = _PyLong_AsByteArray((PyLongObject *)obj,
944 pdata, nbytes,
945 1 /* little endian */ , 1 /* signed */ );
946 if (i < 0)
947 goto error;
948 /* If the long is negative, this may be a byte more than
949 * needed. This is so iff the MSB is all redundant sign
950 * bits.
951 */
952 if (sign < 0 &&
953 nbytes > 1 &&
954 pdata[nbytes - 1] == 0xff &&
955 (pdata[nbytes - 2] & 0x80) != 0) {
956 nbytes--;
957 }
958
959 if (nbytes < 256) {
960 header[0] = LONG1;
961 header[1] = (unsigned char)nbytes;
962 size = 2;
963 }
964 else {
965 header[0] = LONG4;
966 size = (int)nbytes;
967 for (i = 1; i < 5; i++) {
968 header[i] = (unsigned char)(size & 0xff);
969 size >>= 8;
970 }
971 size = 5;
972 }
973 if (pickler_write(self, header, size) < 0 ||
974 pickler_write(self, (char *)pdata, (int)nbytes) < 0)
975 goto error;
976 }
977 else {
978 char *string;
979
Mark Dickinson8dd05142009-01-20 20:43:58 +0000980 /* proto < 2: write the repr and newline. This is quadratic-time (in
981 the number of digits), in both directions. We add a trailing 'L'
982 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000983
984 repr = PyObject_Repr(obj);
985 if (repr == NULL)
986 goto error;
987
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000988 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000989 if (string == NULL)
990 goto error;
991
992 if (pickler_write(self, &long_op, 1) < 0 ||
993 pickler_write(self, string, size) < 0 ||
Mark Dickinson8dd05142009-01-20 20:43:58 +0000994 pickler_write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000995 goto error;
996 }
997
998 if (0) {
999 error:
1000 status = -1;
1001 }
1002 Py_XDECREF(repr);
1003
1004 return status;
1005}
1006
1007static int
1008save_float(PicklerObject *self, PyObject *obj)
1009{
1010 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1011
1012 if (self->bin) {
1013 char pdata[9];
1014 pdata[0] = BINFLOAT;
1015 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1016 return -1;
1017 if (pickler_write(self, pdata, 9) < 0)
1018 return -1;
Eric Smith0923d1d2009-04-16 20:16:10 +00001019 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001020 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001021 int result = -1;
1022 char *buf = NULL;
1023 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001024
Eric Smith0923d1d2009-04-16 20:16:10 +00001025 if (pickler_write(self, &op, 1) < 0)
1026 goto done;
1027
Mark Dickinson3e09f432009-04-17 08:41:23 +00001028 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001029 if (!buf) {
1030 PyErr_NoMemory();
1031 goto done;
1032 }
1033
1034 if (pickler_write(self, buf, strlen(buf)) < 0)
1035 goto done;
1036
1037 if (pickler_write(self, "\n", 1) < 0)
1038 goto done;
1039
1040 result = 0;
1041done:
1042 PyMem_Free(buf);
1043 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001044 }
1045
1046 return 0;
1047}
1048
1049static int
1050save_bytes(PicklerObject *self, PyObject *obj)
1051{
1052 if (self->proto < 3) {
1053 /* Older pickle protocols do not have an opcode for pickling bytes
1054 objects. Therefore, we need to fake the copy protocol (i.e.,
1055 the __reduce__ method) to permit bytes object unpickling. */
1056 PyObject *reduce_value = NULL;
1057 PyObject *bytelist = NULL;
1058 int status;
1059
1060 bytelist = PySequence_List(obj);
1061 if (bytelist == NULL)
1062 return -1;
1063
1064 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1065 bytelist);
1066 if (reduce_value == NULL) {
1067 Py_DECREF(bytelist);
1068 return -1;
1069 }
1070
1071 /* save_reduce() will memoize the object automatically. */
1072 status = save_reduce(self, reduce_value, obj);
1073 Py_DECREF(reduce_value);
1074 Py_DECREF(bytelist);
1075 return status;
1076 }
1077 else {
1078 Py_ssize_t size;
1079 char header[5];
1080 int len;
1081
1082 size = PyBytes_Size(obj);
1083 if (size < 0)
1084 return -1;
1085
1086 if (size < 256) {
1087 header[0] = SHORT_BINBYTES;
1088 header[1] = (unsigned char)size;
1089 len = 2;
1090 }
1091 else if (size <= 0xffffffffL) {
1092 header[0] = BINBYTES;
1093 header[1] = (unsigned char)(size & 0xff);
1094 header[2] = (unsigned char)((size >> 8) & 0xff);
1095 header[3] = (unsigned char)((size >> 16) & 0xff);
1096 header[4] = (unsigned char)((size >> 24) & 0xff);
1097 len = 5;
1098 }
1099 else {
1100 return -1; /* string too large */
1101 }
1102
1103 if (pickler_write(self, header, len) < 0)
1104 return -1;
1105
1106 if (pickler_write(self, PyBytes_AS_STRING(obj), size) < 0)
1107 return -1;
1108
1109 if (memo_put(self, obj) < 0)
1110 return -1;
1111
1112 return 0;
1113 }
1114}
1115
1116/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1117 backslash and newline characters to \uXXXX escapes. */
1118static PyObject *
1119raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1120{
1121 PyObject *repr, *result;
1122 char *p;
1123 char *q;
1124
1125 static const char *hexdigits = "0123456789abcdef";
1126
1127#ifdef Py_UNICODE_WIDE
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001128 const Py_ssize_t expandsize = 10;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001129#else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001130 const Py_ssize_t expandsize = 6;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001131#endif
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001132
1133 if (size > PY_SSIZE_T_MAX / expandsize)
1134 return PyErr_NoMemory();
1135
1136 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001137 if (repr == NULL)
1138 return NULL;
1139 if (size == 0)
1140 goto done;
1141
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001142 p = q = PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001143 while (size-- > 0) {
1144 Py_UNICODE ch = *s++;
1145#ifdef Py_UNICODE_WIDE
1146 /* Map 32-bit characters to '\Uxxxxxxxx' */
1147 if (ch >= 0x10000) {
1148 *p++ = '\\';
1149 *p++ = 'U';
1150 *p++ = hexdigits[(ch >> 28) & 0xf];
1151 *p++ = hexdigits[(ch >> 24) & 0xf];
1152 *p++ = hexdigits[(ch >> 20) & 0xf];
1153 *p++ = hexdigits[(ch >> 16) & 0xf];
1154 *p++ = hexdigits[(ch >> 12) & 0xf];
1155 *p++ = hexdigits[(ch >> 8) & 0xf];
1156 *p++ = hexdigits[(ch >> 4) & 0xf];
1157 *p++ = hexdigits[ch & 15];
1158 }
1159 else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001160#else
1161 /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
1162 if (ch >= 0xD800 && ch < 0xDC00) {
1163 Py_UNICODE ch2;
1164 Py_UCS4 ucs;
1165
1166 ch2 = *s++;
1167 size--;
1168 if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
1169 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
1170 *p++ = '\\';
1171 *p++ = 'U';
1172 *p++ = hexdigits[(ucs >> 28) & 0xf];
1173 *p++ = hexdigits[(ucs >> 24) & 0xf];
1174 *p++ = hexdigits[(ucs >> 20) & 0xf];
1175 *p++ = hexdigits[(ucs >> 16) & 0xf];
1176 *p++ = hexdigits[(ucs >> 12) & 0xf];
1177 *p++ = hexdigits[(ucs >> 8) & 0xf];
1178 *p++ = hexdigits[(ucs >> 4) & 0xf];
1179 *p++ = hexdigits[ucs & 0xf];
1180 continue;
1181 }
1182 /* Fall through: isolated surrogates are copied as-is */
1183 s--;
1184 size++;
1185 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001186#endif
1187 /* Map 16-bit characters to '\uxxxx' */
1188 if (ch >= 256 || ch == '\\' || ch == '\n') {
1189 *p++ = '\\';
1190 *p++ = 'u';
1191 *p++ = hexdigits[(ch >> 12) & 0xf];
1192 *p++ = hexdigits[(ch >> 8) & 0xf];
1193 *p++ = hexdigits[(ch >> 4) & 0xf];
1194 *p++ = hexdigits[ch & 15];
1195 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001196 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001197 else
1198 *p++ = (char) ch;
1199 }
1200 size = p - q;
1201
1202 done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001203 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001204 Py_DECREF(repr);
1205 return result;
1206}
1207
1208static int
1209save_unicode(PicklerObject *self, PyObject *obj)
1210{
1211 Py_ssize_t size;
1212 PyObject *encoded = NULL;
1213
1214 if (self->bin) {
1215 char pdata[5];
1216
1217 encoded = PyUnicode_AsUTF8String(obj);
1218 if (encoded == NULL)
1219 goto error;
1220
1221 size = PyBytes_GET_SIZE(encoded);
1222 if (size < 0 || size > 0xffffffffL)
1223 goto error; /* string too large */
1224
1225 pdata[0] = BINUNICODE;
1226 pdata[1] = (unsigned char)(size & 0xff);
1227 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1228 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1229 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1230
1231 if (pickler_write(self, pdata, 5) < 0)
1232 goto error;
1233
1234 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1235 goto error;
1236 }
1237 else {
1238 const char unicode_op = UNICODE;
1239
1240 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1241 PyUnicode_GET_SIZE(obj));
1242 if (encoded == NULL)
1243 goto error;
1244
1245 if (pickler_write(self, &unicode_op, 1) < 0)
1246 goto error;
1247
1248 size = PyBytes_GET_SIZE(encoded);
1249 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1250 goto error;
1251
1252 if (pickler_write(self, "\n", 1) < 0)
1253 goto error;
1254 }
1255 if (memo_put(self, obj) < 0)
1256 goto error;
1257
1258 Py_DECREF(encoded);
1259 return 0;
1260
1261 error:
1262 Py_XDECREF(encoded);
1263 return -1;
1264}
1265
1266/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1267static int
1268store_tuple_elements(PicklerObject *self, PyObject *t, int len)
1269{
1270 int i;
1271
1272 assert(PyTuple_Size(t) == len);
1273
1274 for (i = 0; i < len; i++) {
1275 PyObject *element = PyTuple_GET_ITEM(t, i);
1276
1277 if (element == NULL)
1278 return -1;
1279 if (save(self, element, 0) < 0)
1280 return -1;
1281 }
1282
1283 return 0;
1284}
1285
1286/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1287 * used across protocols to minimize the space needed to pickle them.
1288 * Tuples are also the only builtin immutable type that can be recursive
1289 * (a tuple can be reached from itself), and that requires some subtle
1290 * magic so that it works in all cases. IOW, this is a long routine.
1291 */
1292static int
1293save_tuple(PicklerObject *self, PyObject *obj)
1294{
1295 PyObject *memo_key = NULL;
1296 int len, i;
1297 int status = 0;
1298
1299 const char mark_op = MARK;
1300 const char tuple_op = TUPLE;
1301 const char pop_op = POP;
1302 const char pop_mark_op = POP_MARK;
1303 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1304
1305 if ((len = PyTuple_Size(obj)) < 0)
1306 return -1;
1307
1308 if (len == 0) {
1309 char pdata[2];
1310
1311 if (self->proto) {
1312 pdata[0] = EMPTY_TUPLE;
1313 len = 1;
1314 }
1315 else {
1316 pdata[0] = MARK;
1317 pdata[1] = TUPLE;
1318 len = 2;
1319 }
1320 if (pickler_write(self, pdata, len) < 0)
1321 return -1;
1322 return 0;
1323 }
1324
1325 /* id(tuple) isn't in the memo now. If it shows up there after
1326 * saving the tuple elements, the tuple must be recursive, in
1327 * which case we'll pop everything we put on the stack, and fetch
1328 * its value from the memo.
1329 */
1330 memo_key = PyLong_FromVoidPtr(obj);
1331 if (memo_key == NULL)
1332 return -1;
1333
1334 if (len <= 3 && self->proto >= 2) {
1335 /* Use TUPLE{1,2,3} opcodes. */
1336 if (store_tuple_elements(self, obj, len) < 0)
1337 goto error;
1338
1339 if (PyDict_GetItem(self->memo, memo_key)) {
1340 /* pop the len elements */
1341 for (i = 0; i < len; i++)
1342 if (pickler_write(self, &pop_op, 1) < 0)
1343 goto error;
1344 /* fetch from memo */
1345 if (memo_get(self, memo_key) < 0)
1346 goto error;
1347
1348 Py_DECREF(memo_key);
1349 return 0;
1350 }
1351 else { /* Not recursive. */
1352 if (pickler_write(self, len2opcode + len, 1) < 0)
1353 goto error;
1354 }
1355 goto memoize;
1356 }
1357
1358 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
1359 * Generate MARK e1 e2 ... TUPLE
1360 */
1361 if (pickler_write(self, &mark_op, 1) < 0)
1362 goto error;
1363
1364 if (store_tuple_elements(self, obj, len) < 0)
1365 goto error;
1366
1367 if (PyDict_GetItem(self->memo, memo_key)) {
1368 /* pop the stack stuff we pushed */
1369 if (self->bin) {
1370 if (pickler_write(self, &pop_mark_op, 1) < 0)
1371 goto error;
1372 }
1373 else {
1374 /* Note that we pop one more than len, to remove
1375 * the MARK too.
1376 */
1377 for (i = 0; i <= len; i++)
1378 if (pickler_write(self, &pop_op, 1) < 0)
1379 goto error;
1380 }
1381 /* fetch from memo */
1382 if (memo_get(self, memo_key) < 0)
1383 goto error;
1384
1385 Py_DECREF(memo_key);
1386 return 0;
1387 }
1388 else { /* Not recursive. */
1389 if (pickler_write(self, &tuple_op, 1) < 0)
1390 goto error;
1391 }
1392
1393 memoize:
1394 if (memo_put(self, obj) < 0)
1395 goto error;
1396
1397 if (0) {
1398 error:
1399 status = -1;
1400 }
1401
1402 Py_DECREF(memo_key);
1403 return status;
1404}
1405
1406/* iter is an iterator giving items, and we batch up chunks of
1407 * MARK item item ... item APPENDS
1408 * opcode sequences. Calling code should have arranged to first create an
1409 * empty list, or list-like object, for the APPENDS to operate on.
1410 * Returns 0 on success, <0 on error.
1411 */
1412static int
1413batch_list(PicklerObject *self, PyObject *iter)
1414{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001415 PyObject *obj = NULL;
1416 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001417 int i, n;
1418
1419 const char mark_op = MARK;
1420 const char append_op = APPEND;
1421 const char appends_op = APPENDS;
1422
1423 assert(iter != NULL);
1424
1425 /* XXX: I think this function could be made faster by avoiding the
1426 iterator interface and fetching objects directly from list using
1427 PyList_GET_ITEM.
1428 */
1429
1430 if (self->proto == 0) {
1431 /* APPENDS isn't available; do one at a time. */
1432 for (;;) {
1433 obj = PyIter_Next(iter);
1434 if (obj == NULL) {
1435 if (PyErr_Occurred())
1436 return -1;
1437 break;
1438 }
1439 i = save(self, obj, 0);
1440 Py_DECREF(obj);
1441 if (i < 0)
1442 return -1;
1443 if (pickler_write(self, &append_op, 1) < 0)
1444 return -1;
1445 }
1446 return 0;
1447 }
1448
1449 /* proto > 0: write in batches of BATCHSIZE. */
1450 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001451 /* Get first item */
1452 firstitem = PyIter_Next(iter);
1453 if (firstitem == NULL) {
1454 if (PyErr_Occurred())
1455 goto error;
1456
1457 /* nothing more to add */
1458 break;
1459 }
1460
1461 /* Try to get a second item */
1462 obj = PyIter_Next(iter);
1463 if (obj == NULL) {
1464 if (PyErr_Occurred())
1465 goto error;
1466
1467 /* Only one item to write */
1468 if (save(self, firstitem, 0) < 0)
1469 goto error;
1470 if (pickler_write(self, &append_op, 1) < 0)
1471 goto error;
1472 Py_CLEAR(firstitem);
1473 break;
1474 }
1475
1476 /* More than one item to write */
1477
1478 /* Pump out MARK, items, APPENDS. */
1479 if (pickler_write(self, &mark_op, 1) < 0)
1480 goto error;
1481
1482 if (save(self, firstitem, 0) < 0)
1483 goto error;
1484 Py_CLEAR(firstitem);
1485 n = 1;
1486
1487 /* Fetch and save up to BATCHSIZE items */
1488 while (obj) {
1489 if (save(self, obj, 0) < 0)
1490 goto error;
1491 Py_CLEAR(obj);
1492 n += 1;
1493
1494 if (n == BATCHSIZE)
1495 break;
1496
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001497 obj = PyIter_Next(iter);
1498 if (obj == NULL) {
1499 if (PyErr_Occurred())
1500 goto error;
1501 break;
1502 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001503 }
1504
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001505 if (pickler_write(self, &appends_op, 1) < 0)
1506 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001507
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001508 } while (n == BATCHSIZE);
1509 return 0;
1510
1511 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001512 Py_XDECREF(firstitem);
1513 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001514 return -1;
1515}
1516
1517static int
1518save_list(PicklerObject *self, PyObject *obj)
1519{
1520 PyObject *iter;
1521 char header[3];
1522 int len;
1523 int status = 0;
1524
1525 if (self->fast && !fast_save_enter(self, obj))
1526 goto error;
1527
1528 /* Create an empty list. */
1529 if (self->bin) {
1530 header[0] = EMPTY_LIST;
1531 len = 1;
1532 }
1533 else {
1534 header[0] = MARK;
1535 header[1] = LIST;
1536 len = 2;
1537 }
1538
1539 if (pickler_write(self, header, len) < 0)
1540 goto error;
1541
1542 /* Get list length, and bow out early if empty. */
1543 if ((len = PyList_Size(obj)) < 0)
1544 goto error;
1545
1546 if (memo_put(self, obj) < 0)
1547 goto error;
1548
1549 if (len != 0) {
1550 /* Save the list elements. */
1551 iter = PyObject_GetIter(obj);
1552 if (iter == NULL)
1553 goto error;
1554 status = batch_list(self, iter);
1555 Py_DECREF(iter);
1556 }
1557
1558 if (0) {
1559 error:
1560 status = -1;
1561 }
1562
1563 if (self->fast && !fast_save_leave(self, obj))
1564 status = -1;
1565
1566 return status;
1567}
1568
1569/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
1570 * MARK key value ... key value SETITEMS
1571 * opcode sequences. Calling code should have arranged to first create an
1572 * empty dict, or dict-like object, for the SETITEMS to operate on.
1573 * Returns 0 on success, <0 on error.
1574 *
1575 * This is very much like batch_list(). The difference between saving
1576 * elements directly, and picking apart two-tuples, is so long-winded at
1577 * the C level, though, that attempts to combine these routines were too
1578 * ugly to bear.
1579 */
1580static int
1581batch_dict(PicklerObject *self, PyObject *iter)
1582{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001583 PyObject *obj = NULL;
1584 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001585 int i, n;
1586
1587 const char mark_op = MARK;
1588 const char setitem_op = SETITEM;
1589 const char setitems_op = SETITEMS;
1590
1591 assert(iter != NULL);
1592
1593 if (self->proto == 0) {
1594 /* SETITEMS isn't available; do one at a time. */
1595 for (;;) {
1596 obj = PyIter_Next(iter);
1597 if (obj == NULL) {
1598 if (PyErr_Occurred())
1599 return -1;
1600 break;
1601 }
1602 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1603 PyErr_SetString(PyExc_TypeError, "dict items "
1604 "iterator must return 2-tuples");
1605 return -1;
1606 }
1607 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
1608 if (i >= 0)
1609 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
1610 Py_DECREF(obj);
1611 if (i < 0)
1612 return -1;
1613 if (pickler_write(self, &setitem_op, 1) < 0)
1614 return -1;
1615 }
1616 return 0;
1617 }
1618
1619 /* proto > 0: write in batches of BATCHSIZE. */
1620 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001621 /* Get first item */
1622 firstitem = PyIter_Next(iter);
1623 if (firstitem == NULL) {
1624 if (PyErr_Occurred())
1625 goto error;
1626
1627 /* nothing more to add */
1628 break;
1629 }
1630 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
1631 PyErr_SetString(PyExc_TypeError, "dict items "
1632 "iterator must return 2-tuples");
1633 goto error;
1634 }
1635
1636 /* Try to get a second item */
1637 obj = PyIter_Next(iter);
1638 if (obj == NULL) {
1639 if (PyErr_Occurred())
1640 goto error;
1641
1642 /* Only one item to write */
1643 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1644 goto error;
1645 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1646 goto error;
1647 if (pickler_write(self, &setitem_op, 1) < 0)
1648 goto error;
1649 Py_CLEAR(firstitem);
1650 break;
1651 }
1652
1653 /* More than one item to write */
1654
1655 /* Pump out MARK, items, SETITEMS. */
1656 if (pickler_write(self, &mark_op, 1) < 0)
1657 goto error;
1658
1659 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1660 goto error;
1661 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1662 goto error;
1663 Py_CLEAR(firstitem);
1664 n = 1;
1665
1666 /* Fetch and save up to BATCHSIZE items */
1667 while (obj) {
1668 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1669 PyErr_SetString(PyExc_TypeError, "dict items "
1670 "iterator must return 2-tuples");
1671 goto error;
1672 }
1673 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
1674 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
1675 goto error;
1676 Py_CLEAR(obj);
1677 n += 1;
1678
1679 if (n == BATCHSIZE)
1680 break;
1681
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001682 obj = PyIter_Next(iter);
1683 if (obj == NULL) {
1684 if (PyErr_Occurred())
1685 goto error;
1686 break;
1687 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001688 }
1689
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001690 if (pickler_write(self, &setitems_op, 1) < 0)
1691 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001692
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001693 } while (n == BATCHSIZE);
1694 return 0;
1695
1696 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001697 Py_XDECREF(firstitem);
1698 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001699 return -1;
1700}
1701
Collin Winter5c9b02d2009-05-25 05:43:30 +00001702/* This is a variant of batch_dict() above that specializes for dicts, with no
1703 * support for dict subclasses. Like batch_dict(), we batch up chunks of
1704 * MARK key value ... key value SETITEMS
1705 * opcode sequences. Calling code should have arranged to first create an
1706 * empty dict, or dict-like object, for the SETITEMS to operate on.
1707 * Returns 0 on success, -1 on error.
1708 *
1709 * Note that this currently doesn't work for protocol 0.
1710 */
1711static int
1712batch_dict_exact(PicklerObject *self, PyObject *obj)
1713{
1714 PyObject *key = NULL, *value = NULL;
1715 int i;
1716 Py_ssize_t dict_size, ppos = 0;
1717
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00001718 const char mark_op = MARK;
1719 const char setitem_op = SETITEM;
1720 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00001721
1722 assert(obj != NULL);
1723 assert(self->proto > 0);
1724
1725 dict_size = PyDict_Size(obj);
1726
1727 /* Special-case len(d) == 1 to save space. */
1728 if (dict_size == 1) {
1729 PyDict_Next(obj, &ppos, &key, &value);
1730 if (save(self, key, 0) < 0)
1731 return -1;
1732 if (save(self, value, 0) < 0)
1733 return -1;
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00001734 if (pickler_write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00001735 return -1;
1736 return 0;
1737 }
1738
1739 /* Write in batches of BATCHSIZE. */
1740 do {
1741 i = 0;
1742 if (pickler_write(self, &mark_op, 1) < 0)
1743 return -1;
1744 while (PyDict_Next(obj, &ppos, &key, &value)) {
1745 if (save(self, key, 0) < 0)
1746 return -1;
1747 if (save(self, value, 0) < 0)
1748 return -1;
1749 if (++i == BATCHSIZE)
1750 break;
1751 }
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00001752 if (pickler_write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00001753 return -1;
1754 if (PyDict_Size(obj) != dict_size) {
1755 PyErr_Format(
1756 PyExc_RuntimeError,
1757 "dictionary changed size during iteration");
1758 return -1;
1759 }
1760
1761 } while (i == BATCHSIZE);
1762 return 0;
1763}
1764
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001765static int
1766save_dict(PicklerObject *self, PyObject *obj)
1767{
1768 PyObject *items, *iter;
1769 char header[3];
1770 int len;
1771 int status = 0;
1772
1773 if (self->fast && !fast_save_enter(self, obj))
1774 goto error;
1775
1776 /* Create an empty dict. */
1777 if (self->bin) {
1778 header[0] = EMPTY_DICT;
1779 len = 1;
1780 }
1781 else {
1782 header[0] = MARK;
1783 header[1] = DICT;
1784 len = 2;
1785 }
1786
1787 if (pickler_write(self, header, len) < 0)
1788 goto error;
1789
1790 /* Get dict size, and bow out early if empty. */
1791 if ((len = PyDict_Size(obj)) < 0)
1792 goto error;
1793
1794 if (memo_put(self, obj) < 0)
1795 goto error;
1796
1797 if (len != 0) {
1798 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00001799 if (PyDict_CheckExact(obj) && self->proto > 0) {
1800 /* We can take certain shortcuts if we know this is a dict and
1801 not a dict subclass. */
1802 if (Py_EnterRecursiveCall(" while pickling an object") == 0) {
1803 status = batch_dict_exact(self, obj);
1804 Py_LeaveRecursiveCall();
1805 }
1806 } else {
1807 items = PyObject_CallMethod(obj, "items", "()");
1808 if (items == NULL)
1809 goto error;
1810 iter = PyObject_GetIter(items);
1811 Py_DECREF(items);
1812 if (iter == NULL)
1813 goto error;
1814 status = batch_dict(self, iter);
1815 Py_DECREF(iter);
1816 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001817 }
1818
1819 if (0) {
1820 error:
1821 status = -1;
1822 }
1823
1824 if (self->fast && !fast_save_leave(self, obj))
1825 status = -1;
1826
1827 return status;
1828}
1829
1830static int
1831save_global(PicklerObject *self, PyObject *obj, PyObject *name)
1832{
1833 static PyObject *name_str = NULL;
1834 PyObject *global_name = NULL;
1835 PyObject *module_name = NULL;
1836 PyObject *module = NULL;
1837 PyObject *cls;
1838 int status = 0;
1839
1840 const char global_op = GLOBAL;
1841
1842 if (name_str == NULL) {
1843 name_str = PyUnicode_InternFromString("__name__");
1844 if (name_str == NULL)
1845 goto error;
1846 }
1847
1848 if (name) {
1849 global_name = name;
1850 Py_INCREF(global_name);
1851 }
1852 else {
1853 global_name = PyObject_GetAttr(obj, name_str);
1854 if (global_name == NULL)
1855 goto error;
1856 }
1857
1858 module_name = whichmodule(obj, global_name);
1859 if (module_name == NULL)
1860 goto error;
1861
1862 /* XXX: Change to use the import C API directly with level=0 to disallow
1863 relative imports.
1864
1865 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
1866 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
1867 custom import functions (IMHO, this would be a nice security
1868 feature). The import C API would need to be extended to support the
1869 extra parameters of __import__ to fix that. */
1870 module = PyImport_Import(module_name);
1871 if (module == NULL) {
1872 PyErr_Format(PicklingError,
1873 "Can't pickle %R: import of module %R failed",
1874 obj, module_name);
1875 goto error;
1876 }
1877 cls = PyObject_GetAttr(module, global_name);
1878 if (cls == NULL) {
1879 PyErr_Format(PicklingError,
1880 "Can't pickle %R: attribute lookup %S.%S failed",
1881 obj, module_name, global_name);
1882 goto error;
1883 }
1884 if (cls != obj) {
1885 Py_DECREF(cls);
1886 PyErr_Format(PicklingError,
1887 "Can't pickle %R: it's not the same object as %S.%S",
1888 obj, module_name, global_name);
1889 goto error;
1890 }
1891 Py_DECREF(cls);
1892
1893 if (self->proto >= 2) {
1894 /* See whether this is in the extension registry, and if
1895 * so generate an EXT opcode.
1896 */
1897 PyObject *code_obj; /* extension code as Python object */
1898 long code; /* extension code as C value */
1899 char pdata[5];
1900 int n;
1901
1902 PyTuple_SET_ITEM(two_tuple, 0, module_name);
1903 PyTuple_SET_ITEM(two_tuple, 1, global_name);
1904 code_obj = PyDict_GetItem(extension_registry, two_tuple);
1905 /* The object is not registered in the extension registry.
1906 This is the most likely code path. */
1907 if (code_obj == NULL)
1908 goto gen_global;
1909
1910 /* XXX: pickle.py doesn't check neither the type, nor the range
1911 of the value returned by the extension_registry. It should for
1912 consistency. */
1913
1914 /* Verify code_obj has the right type and value. */
1915 if (!PyLong_Check(code_obj)) {
1916 PyErr_Format(PicklingError,
1917 "Can't pickle %R: extension code %R isn't an integer",
1918 obj, code_obj);
1919 goto error;
1920 }
1921 code = PyLong_AS_LONG(code_obj);
1922 if (code <= 0 || code > 0x7fffffffL) {
1923 PyErr_Format(PicklingError,
1924 "Can't pickle %R: extension code %ld is out of range",
1925 obj, code);
1926 goto error;
1927 }
1928
1929 /* Generate an EXT opcode. */
1930 if (code <= 0xff) {
1931 pdata[0] = EXT1;
1932 pdata[1] = (unsigned char)code;
1933 n = 2;
1934 }
1935 else if (code <= 0xffff) {
1936 pdata[0] = EXT2;
1937 pdata[1] = (unsigned char)(code & 0xff);
1938 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1939 n = 3;
1940 }
1941 else {
1942 pdata[0] = EXT4;
1943 pdata[1] = (unsigned char)(code & 0xff);
1944 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1945 pdata[3] = (unsigned char)((code >> 16) & 0xff);
1946 pdata[4] = (unsigned char)((code >> 24) & 0xff);
1947 n = 5;
1948 }
1949
1950 if (pickler_write(self, pdata, n) < 0)
1951 goto error;
1952 }
1953 else {
1954 /* Generate a normal global opcode if we are using a pickle
1955 protocol <= 2, or if the object is not registered in the
1956 extension registry. */
1957 PyObject *encoded;
1958 PyObject *(*unicode_encoder)(PyObject *);
1959
1960 gen_global:
1961 if (pickler_write(self, &global_op, 1) < 0)
1962 goto error;
1963
1964 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
1965 the module name and the global name using UTF-8. We do so only when
1966 we are using the pickle protocol newer than version 3. This is to
1967 ensure compatibility with older Unpickler running on Python 2.x. */
1968 if (self->proto >= 3) {
1969 unicode_encoder = PyUnicode_AsUTF8String;
1970 }
1971 else {
1972 unicode_encoder = PyUnicode_AsASCIIString;
1973 }
1974
1975 /* Save the name of the module. */
1976 encoded = unicode_encoder(module_name);
1977 if (encoded == NULL) {
1978 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1979 PyErr_Format(PicklingError,
1980 "can't pickle module identifier '%S' using "
1981 "pickle protocol %i", module_name, self->proto);
1982 goto error;
1983 }
1984 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1985 PyBytes_GET_SIZE(encoded)) < 0) {
1986 Py_DECREF(encoded);
1987 goto error;
1988 }
1989 Py_DECREF(encoded);
1990 if(pickler_write(self, "\n", 1) < 0)
1991 goto error;
1992
1993 /* Save the name of the module. */
1994 encoded = unicode_encoder(global_name);
1995 if (encoded == NULL) {
1996 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1997 PyErr_Format(PicklingError,
1998 "can't pickle global identifier '%S' using "
1999 "pickle protocol %i", global_name, self->proto);
2000 goto error;
2001 }
2002 if (pickler_write(self, PyBytes_AS_STRING(encoded),
2003 PyBytes_GET_SIZE(encoded)) < 0) {
2004 Py_DECREF(encoded);
2005 goto error;
2006 }
2007 Py_DECREF(encoded);
2008 if(pickler_write(self, "\n", 1) < 0)
2009 goto error;
2010
2011 /* Memoize the object. */
2012 if (memo_put(self, obj) < 0)
2013 goto error;
2014 }
2015
2016 if (0) {
2017 error:
2018 status = -1;
2019 }
2020 Py_XDECREF(module_name);
2021 Py_XDECREF(global_name);
2022 Py_XDECREF(module);
2023
2024 return status;
2025}
2026
2027static int
2028save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
2029{
2030 PyObject *pid = NULL;
2031 int status = 0;
2032
2033 const char persid_op = PERSID;
2034 const char binpersid_op = BINPERSID;
2035
2036 Py_INCREF(obj);
2037 pid = pickler_call(self, func, obj);
2038 if (pid == NULL)
2039 return -1;
2040
2041 if (pid != Py_None) {
2042 if (self->bin) {
2043 if (save(self, pid, 1) < 0 ||
2044 pickler_write(self, &binpersid_op, 1) < 0)
2045 goto error;
2046 }
2047 else {
2048 PyObject *pid_str = NULL;
2049 char *pid_ascii_bytes;
2050 Py_ssize_t size;
2051
2052 pid_str = PyObject_Str(pid);
2053 if (pid_str == NULL)
2054 goto error;
2055
2056 /* XXX: Should it check whether the persistent id only contains
2057 ASCII characters? And what if the pid contains embedded
2058 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00002059 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002060 Py_DECREF(pid_str);
2061 if (pid_ascii_bytes == NULL)
2062 goto error;
2063
2064 if (pickler_write(self, &persid_op, 1) < 0 ||
2065 pickler_write(self, pid_ascii_bytes, size) < 0 ||
2066 pickler_write(self, "\n", 1) < 0)
2067 goto error;
2068 }
2069 status = 1;
2070 }
2071
2072 if (0) {
2073 error:
2074 status = -1;
2075 }
2076 Py_XDECREF(pid);
2077
2078 return status;
2079}
2080
2081/* We're saving obj, and args is the 2-thru-5 tuple returned by the
2082 * appropriate __reduce__ method for obj.
2083 */
2084static int
2085save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
2086{
2087 PyObject *callable;
2088 PyObject *argtup;
2089 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002090 PyObject *listitems = Py_None;
2091 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002092 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002093
2094 int use_newobj = self->proto >= 2;
2095
2096 const char reduce_op = REDUCE;
2097 const char build_op = BUILD;
2098 const char newobj_op = NEWOBJ;
2099
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002100 size = PyTuple_Size(args);
2101 if (size < 2 || size > 5) {
2102 PyErr_SetString(PicklingError, "tuple returned by "
2103 "__reduce__ must contain 2 through 5 elements");
2104 return -1;
2105 }
2106
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002107 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2108 &callable, &argtup, &state, &listitems, &dictitems))
2109 return -1;
2110
2111 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002112 PyErr_SetString(PicklingError, "first item of the tuple "
2113 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002114 return -1;
2115 }
2116 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002117 PyErr_SetString(PicklingError, "second item of the tuple "
2118 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002119 return -1;
2120 }
2121
2122 if (state == Py_None)
2123 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002124
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002125 if (listitems == Py_None)
2126 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002127 else if (!PyIter_Check(listitems)) {
2128 PyErr_Format(PicklingError, "Fourth element of tuple"
2129 "returned by __reduce__ must be an iterator, not %s",
2130 Py_TYPE(listitems)->tp_name);
2131 return -1;
2132 }
2133
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002134 if (dictitems == Py_None)
2135 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002136 else if (!PyIter_Check(dictitems)) {
2137 PyErr_Format(PicklingError, "Fifth element of tuple"
2138 "returned by __reduce__ must be an iterator, not %s",
2139 Py_TYPE(dictitems)->tp_name);
2140 return -1;
2141 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002142
2143 /* Protocol 2 special case: if callable's name is __newobj__, use
2144 NEWOBJ. */
2145 if (use_newobj) {
2146 static PyObject *newobj_str = NULL;
2147 PyObject *name_str;
2148
2149 if (newobj_str == NULL) {
2150 newobj_str = PyUnicode_InternFromString("__newobj__");
2151 }
2152
2153 name_str = PyObject_GetAttrString(callable, "__name__");
2154 if (name_str == NULL) {
2155 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2156 PyErr_Clear();
2157 else
2158 return -1;
2159 use_newobj = 0;
2160 }
2161 else {
2162 use_newobj = PyUnicode_Check(name_str) &&
2163 PyUnicode_Compare(name_str, newobj_str) == 0;
2164 Py_DECREF(name_str);
2165 }
2166 }
2167 if (use_newobj) {
2168 PyObject *cls;
2169 PyObject *newargtup;
2170 PyObject *obj_class;
2171 int p;
2172
2173 /* Sanity checks. */
2174 if (Py_SIZE(argtup) < 1) {
2175 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2176 return -1;
2177 }
2178
2179 cls = PyTuple_GET_ITEM(argtup, 0);
2180 if (!PyObject_HasAttrString(cls, "__new__")) {
2181 PyErr_SetString(PicklingError, "args[0] from "
2182 "__newobj__ args has no __new__");
2183 return -1;
2184 }
2185
2186 if (obj != NULL) {
2187 obj_class = PyObject_GetAttrString(obj, "__class__");
2188 if (obj_class == NULL) {
2189 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2190 PyErr_Clear();
2191 else
2192 return -1;
2193 }
2194 p = obj_class != cls; /* true iff a problem */
2195 Py_DECREF(obj_class);
2196 if (p) {
2197 PyErr_SetString(PicklingError, "args[0] from "
2198 "__newobj__ args has the wrong class");
2199 return -1;
2200 }
2201 }
2202 /* XXX: These calls save() are prone to infinite recursion. Imagine
2203 what happen if the value returned by the __reduce__() method of
2204 some extension type contains another object of the same type. Ouch!
2205
2206 Here is a quick example, that I ran into, to illustrate what I
2207 mean:
2208
2209 >>> import pickle, copyreg
2210 >>> copyreg.dispatch_table.pop(complex)
2211 >>> pickle.dumps(1+2j)
2212 Traceback (most recent call last):
2213 ...
2214 RuntimeError: maximum recursion depth exceeded
2215
2216 Removing the complex class from copyreg.dispatch_table made the
2217 __reduce_ex__() method emit another complex object:
2218
2219 >>> (1+1j).__reduce_ex__(2)
2220 (<function __newobj__ at 0xb7b71c3c>,
2221 (<class 'complex'>, (1+1j)), None, None, None)
2222
2223 Thus when save() was called on newargstup (the 2nd item) recursion
2224 ensued. Of course, the bug was in the complex class which had a
2225 broken __getnewargs__() that emitted another complex object. But,
2226 the point, here, is it is quite easy to end up with a broken reduce
2227 function. */
2228
2229 /* Save the class and its __new__ arguments. */
2230 if (save(self, cls, 0) < 0)
2231 return -1;
2232
2233 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
2234 if (newargtup == NULL)
2235 return -1;
2236
2237 p = save(self, newargtup, 0);
2238 Py_DECREF(newargtup);
2239 if (p < 0)
2240 return -1;
2241
2242 /* Add NEWOBJ opcode. */
2243 if (pickler_write(self, &newobj_op, 1) < 0)
2244 return -1;
2245 }
2246 else { /* Not using NEWOBJ. */
2247 if (save(self, callable, 0) < 0 ||
2248 save(self, argtup, 0) < 0 ||
2249 pickler_write(self, &reduce_op, 1) < 0)
2250 return -1;
2251 }
2252
2253 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
2254 the caller do not want to memoize the object. Not particularly useful,
2255 but that is to mimic the behavior save_reduce() in pickle.py when
2256 obj is None. */
2257 if (obj && memo_put(self, obj) < 0)
2258 return -1;
2259
2260 if (listitems && batch_list(self, listitems) < 0)
2261 return -1;
2262
2263 if (dictitems && batch_dict(self, dictitems) < 0)
2264 return -1;
2265
2266 if (state) {
2267 if (save(self, state, 0) < 0 ||
2268 pickler_write(self, &build_op, 1) < 0)
2269 return -1;
2270 }
2271
2272 return 0;
2273}
2274
2275static int
2276save(PicklerObject *self, PyObject *obj, int pers_save)
2277{
2278 PyTypeObject *type;
2279 PyObject *reduce_func = NULL;
2280 PyObject *reduce_value = NULL;
2281 PyObject *memo_key = NULL;
2282 int status = 0;
2283
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002284 if (Py_EnterRecursiveCall(" while pickling an object") < 0)
2285 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002286
2287 /* The extra pers_save argument is necessary to avoid calling save_pers()
2288 on its returned object. */
2289 if (!pers_save && self->pers_func) {
2290 /* save_pers() returns:
2291 -1 to signal an error;
2292 0 if it did nothing successfully;
2293 1 if a persistent id was saved.
2294 */
2295 if ((status = save_pers(self, obj, self->pers_func)) != 0)
2296 goto done;
2297 }
2298
2299 type = Py_TYPE(obj);
2300
2301 /* XXX: The old cPickle had an optimization that used switch-case
2302 statement dispatching on the first letter of the type name. It was
2303 probably not a bad idea after all. If benchmarks shows that particular
2304 optimization had some real benefits, it would be nice to add it
2305 back. */
2306
2307 /* Atom types; these aren't memoized, so don't check the memo. */
2308
2309 if (obj == Py_None) {
2310 status = save_none(self, obj);
2311 goto done;
2312 }
2313 else if (obj == Py_False || obj == Py_True) {
2314 status = save_bool(self, obj);
2315 goto done;
2316 }
2317 else if (type == &PyLong_Type) {
2318 status = save_long(self, obj);
2319 goto done;
2320 }
2321 else if (type == &PyFloat_Type) {
2322 status = save_float(self, obj);
2323 goto done;
2324 }
2325
2326 /* Check the memo to see if it has the object. If so, generate
2327 a GET (or BINGET) opcode, instead of pickling the object
2328 once again. */
2329 memo_key = PyLong_FromVoidPtr(obj);
2330 if (memo_key == NULL)
2331 goto error;
2332 if (PyDict_GetItem(self->memo, memo_key)) {
2333 if (memo_get(self, memo_key) < 0)
2334 goto error;
2335 goto done;
2336 }
2337
2338 if (type == &PyBytes_Type) {
2339 status = save_bytes(self, obj);
2340 goto done;
2341 }
2342 else if (type == &PyUnicode_Type) {
2343 status = save_unicode(self, obj);
2344 goto done;
2345 }
2346 else if (type == &PyDict_Type) {
2347 status = save_dict(self, obj);
2348 goto done;
2349 }
2350 else if (type == &PyList_Type) {
2351 status = save_list(self, obj);
2352 goto done;
2353 }
2354 else if (type == &PyTuple_Type) {
2355 status = save_tuple(self, obj);
2356 goto done;
2357 }
2358 else if (type == &PyType_Type) {
2359 status = save_global(self, obj, NULL);
2360 goto done;
2361 }
2362 else if (type == &PyFunction_Type) {
2363 status = save_global(self, obj, NULL);
2364 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
2365 /* fall back to reduce */
2366 PyErr_Clear();
2367 }
2368 else {
2369 goto done;
2370 }
2371 }
2372 else if (type == &PyCFunction_Type) {
2373 status = save_global(self, obj, NULL);
2374 goto done;
2375 }
2376 else if (PyType_IsSubtype(type, &PyType_Type)) {
2377 status = save_global(self, obj, NULL);
2378 goto done;
2379 }
2380
2381 /* XXX: This part needs some unit tests. */
2382
2383 /* Get a reduction callable, and call it. This may come from
2384 * copyreg.dispatch_table, the object's __reduce_ex__ method,
2385 * or the object's __reduce__ method.
2386 */
2387 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
2388 if (reduce_func != NULL) {
2389 /* Here, the reference count of the reduce_func object returned by
2390 PyDict_GetItem needs to be increased to be consistent with the one
2391 returned by PyObject_GetAttr. This is allow us to blindly DECREF
2392 reduce_func at the end of the save() routine.
2393 */
2394 Py_INCREF(reduce_func);
2395 Py_INCREF(obj);
2396 reduce_value = pickler_call(self, reduce_func, obj);
2397 }
2398 else {
2399 static PyObject *reduce_str = NULL;
2400 static PyObject *reduce_ex_str = NULL;
2401
2402 /* Cache the name of the reduce methods. */
2403 if (reduce_str == NULL) {
2404 reduce_str = PyUnicode_InternFromString("__reduce__");
2405 if (reduce_str == NULL)
2406 goto error;
2407 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
2408 if (reduce_ex_str == NULL)
2409 goto error;
2410 }
2411
2412 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
2413 automatically defined as __reduce__. While this is convenient, this
2414 make it impossible to know which method was actually called. Of
2415 course, this is not a big deal. But still, it would be nice to let
2416 the user know which method was called when something go
2417 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
2418 don't actually have to check for a __reduce__ method. */
2419
2420 /* Check for a __reduce_ex__ method. */
2421 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
2422 if (reduce_func != NULL) {
2423 PyObject *proto;
2424 proto = PyLong_FromLong(self->proto);
2425 if (proto != NULL) {
2426 reduce_value = pickler_call(self, reduce_func, proto);
2427 }
2428 }
2429 else {
2430 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2431 PyErr_Clear();
2432 else
2433 goto error;
2434 /* Check for a __reduce__ method. */
2435 reduce_func = PyObject_GetAttr(obj, reduce_str);
2436 if (reduce_func != NULL) {
2437 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
2438 }
2439 else {
2440 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
2441 type->tp_name, obj);
2442 goto error;
2443 }
2444 }
2445 }
2446
2447 if (reduce_value == NULL)
2448 goto error;
2449
2450 if (PyUnicode_Check(reduce_value)) {
2451 status = save_global(self, obj, reduce_value);
2452 goto done;
2453 }
2454
2455 if (!PyTuple_Check(reduce_value)) {
2456 PyErr_SetString(PicklingError,
2457 "__reduce__ must return a string or tuple");
2458 goto error;
2459 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002460
2461 status = save_reduce(self, reduce_value, obj);
2462
2463 if (0) {
2464 error:
2465 status = -1;
2466 }
2467 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002468 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002469 Py_XDECREF(memo_key);
2470 Py_XDECREF(reduce_func);
2471 Py_XDECREF(reduce_value);
2472
2473 return status;
2474}
2475
2476static int
2477dump(PicklerObject *self, PyObject *obj)
2478{
2479 const char stop_op = STOP;
2480
2481 if (self->proto >= 2) {
2482 char header[2];
2483
2484 header[0] = PROTO;
2485 assert(self->proto >= 0 && self->proto < 256);
2486 header[1] = (unsigned char)self->proto;
2487 if (pickler_write(self, header, 2) < 0)
2488 return -1;
2489 }
2490
2491 if (save(self, obj, 0) < 0 ||
2492 pickler_write(self, &stop_op, 1) < 0 ||
2493 pickler_write(self, NULL, 0) < 0)
2494 return -1;
2495
2496 return 0;
2497}
2498
2499PyDoc_STRVAR(Pickler_clear_memo_doc,
2500"clear_memo() -> None. Clears the pickler's \"memo\"."
2501"\n"
2502"The memo is the data structure that remembers which objects the\n"
2503"pickler has already seen, so that shared or recursive objects are\n"
2504"pickled by reference and not by value. This method is useful when\n"
2505"re-using picklers.");
2506
2507static PyObject *
2508Pickler_clear_memo(PicklerObject *self)
2509{
2510 if (self->memo)
2511 PyDict_Clear(self->memo);
2512
2513 Py_RETURN_NONE;
2514}
2515
2516PyDoc_STRVAR(Pickler_dump_doc,
2517"dump(obj) -> None. Write a pickled representation of obj to the open file.");
2518
2519static PyObject *
2520Pickler_dump(PicklerObject *self, PyObject *args)
2521{
2522 PyObject *obj;
2523
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00002524 /* Check whether the Pickler was initialized correctly (issue3664).
2525 Developers often forget to call __init__() in their subclasses, which
2526 would trigger a segfault without this check. */
2527 if (self->write == NULL) {
2528 PyErr_Format(PicklingError,
2529 "Pickler.__init__() was not called by %s.__init__()",
2530 Py_TYPE(self)->tp_name);
2531 return NULL;
2532 }
2533
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002534 if (!PyArg_ParseTuple(args, "O:dump", &obj))
2535 return NULL;
2536
2537 if (dump(self, obj) < 0)
2538 return NULL;
2539
2540 Py_RETURN_NONE;
2541}
2542
2543static struct PyMethodDef Pickler_methods[] = {
2544 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
2545 Pickler_dump_doc},
2546 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
2547 Pickler_clear_memo_doc},
2548 {NULL, NULL} /* sentinel */
2549};
2550
2551static void
2552Pickler_dealloc(PicklerObject *self)
2553{
2554 PyObject_GC_UnTrack(self);
2555
2556 Py_XDECREF(self->write);
2557 Py_XDECREF(self->memo);
2558 Py_XDECREF(self->pers_func);
2559 Py_XDECREF(self->arg);
2560 Py_XDECREF(self->fast_memo);
2561
2562 PyMem_Free(self->write_buf);
2563
2564 Py_TYPE(self)->tp_free((PyObject *)self);
2565}
2566
2567static int
2568Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
2569{
2570 Py_VISIT(self->write);
2571 Py_VISIT(self->memo);
2572 Py_VISIT(self->pers_func);
2573 Py_VISIT(self->arg);
2574 Py_VISIT(self->fast_memo);
2575 return 0;
2576}
2577
2578static int
2579Pickler_clear(PicklerObject *self)
2580{
2581 Py_CLEAR(self->write);
2582 Py_CLEAR(self->memo);
2583 Py_CLEAR(self->pers_func);
2584 Py_CLEAR(self->arg);
2585 Py_CLEAR(self->fast_memo);
2586
2587 PyMem_Free(self->write_buf);
2588 self->write_buf = NULL;
2589
2590 return 0;
2591}
2592
2593PyDoc_STRVAR(Pickler_doc,
2594"Pickler(file, protocol=None)"
2595"\n"
2596"This takes a binary file for writing a pickle data stream.\n"
2597"\n"
2598"The optional protocol argument tells the pickler to use the\n"
2599"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
2600"protocol is 3; a backward-incompatible protocol designed for\n"
2601"Python 3.0.\n"
2602"\n"
2603"Specifying a negative protocol version selects the highest\n"
2604"protocol version supported. The higher the protocol used, the\n"
2605"more recent the version of Python needed to read the pickle\n"
2606"produced.\n"
2607"\n"
2608"The file argument must have a write() method that accepts a single\n"
2609"bytes argument. It can thus be a file object opened for binary\n"
2610"writing, a io.BytesIO instance, or any other custom object that\n"
2611"meets this interface.\n");
2612
2613static int
2614Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
2615{
2616 static char *kwlist[] = {"file", "protocol", 0};
2617 PyObject *file;
2618 PyObject *proto_obj = NULL;
2619 long proto = 0;
2620
2621 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:Pickler",
2622 kwlist, &file, &proto_obj))
2623 return -1;
2624
2625 /* In case of multiple __init__() calls, clear previous content. */
2626 if (self->write != NULL)
2627 (void)Pickler_clear(self);
2628
2629 if (proto_obj == NULL || proto_obj == Py_None)
2630 proto = DEFAULT_PROTOCOL;
2631 else
2632 proto = PyLong_AsLong(proto_obj);
2633
2634 if (proto < 0)
2635 proto = HIGHEST_PROTOCOL;
2636 if (proto > HIGHEST_PROTOCOL) {
2637 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
2638 HIGHEST_PROTOCOL);
2639 return -1;
2640 }
2641
2642 self->proto = proto;
2643 self->bin = proto > 0;
2644 self->arg = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002645 self->fast = 0;
2646 self->fast_nesting = 0;
2647 self->fast_memo = NULL;
2648
2649 if (!PyObject_HasAttrString(file, "write")) {
2650 PyErr_SetString(PyExc_TypeError,
2651 "file must have a 'write' attribute");
2652 return -1;
2653 }
2654 self->write = PyObject_GetAttrString(file, "write");
2655 if (self->write == NULL)
2656 return -1;
2657 self->buf_size = 0;
2658 self->write_buf = (char *)PyMem_Malloc(WRITE_BUF_SIZE);
2659 if (self->write_buf == NULL) {
2660 PyErr_NoMemory();
2661 return -1;
2662 }
2663 self->pers_func = NULL;
2664 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
2665 self->pers_func = PyObject_GetAttrString((PyObject *)self,
2666 "persistent_id");
2667 if (self->pers_func == NULL)
2668 return -1;
2669 }
2670 self->memo = PyDict_New();
2671 if (self->memo == NULL)
2672 return -1;
2673
2674 return 0;
2675}
2676
2677static PyObject *
2678Pickler_get_memo(PicklerObject *self)
2679{
2680 if (self->memo == NULL)
2681 PyErr_SetString(PyExc_AttributeError, "memo");
2682 else
2683 Py_INCREF(self->memo);
2684 return self->memo;
2685}
2686
2687static int
2688Pickler_set_memo(PicklerObject *self, PyObject *value)
2689{
2690 PyObject *tmp;
2691
2692 if (value == NULL) {
2693 PyErr_SetString(PyExc_TypeError,
2694 "attribute deletion is not supported");
2695 return -1;
2696 }
2697 if (!PyDict_Check(value)) {
2698 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
2699 return -1;
2700 }
2701
2702 tmp = self->memo;
2703 Py_INCREF(value);
2704 self->memo = value;
2705 Py_XDECREF(tmp);
2706
2707 return 0;
2708}
2709
2710static PyObject *
2711Pickler_get_persid(PicklerObject *self)
2712{
2713 if (self->pers_func == NULL)
2714 PyErr_SetString(PyExc_AttributeError, "persistent_id");
2715 else
2716 Py_INCREF(self->pers_func);
2717 return self->pers_func;
2718}
2719
2720static int
2721Pickler_set_persid(PicklerObject *self, PyObject *value)
2722{
2723 PyObject *tmp;
2724
2725 if (value == NULL) {
2726 PyErr_SetString(PyExc_TypeError,
2727 "attribute deletion is not supported");
2728 return -1;
2729 }
2730 if (!PyCallable_Check(value)) {
2731 PyErr_SetString(PyExc_TypeError,
2732 "persistent_id must be a callable taking one argument");
2733 return -1;
2734 }
2735
2736 tmp = self->pers_func;
2737 Py_INCREF(value);
2738 self->pers_func = value;
2739 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
2740
2741 return 0;
2742}
2743
2744static PyMemberDef Pickler_members[] = {
2745 {"bin", T_INT, offsetof(PicklerObject, bin)},
2746 {"fast", T_INT, offsetof(PicklerObject, fast)},
2747 {NULL}
2748};
2749
2750static PyGetSetDef Pickler_getsets[] = {
2751 {"memo", (getter)Pickler_get_memo,
2752 (setter)Pickler_set_memo},
2753 {"persistent_id", (getter)Pickler_get_persid,
2754 (setter)Pickler_set_persid},
2755 {NULL}
2756};
2757
2758static PyTypeObject Pickler_Type = {
2759 PyVarObject_HEAD_INIT(NULL, 0)
2760 "_pickle.Pickler" , /*tp_name*/
2761 sizeof(PicklerObject), /*tp_basicsize*/
2762 0, /*tp_itemsize*/
2763 (destructor)Pickler_dealloc, /*tp_dealloc*/
2764 0, /*tp_print*/
2765 0, /*tp_getattr*/
2766 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00002767 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002768 0, /*tp_repr*/
2769 0, /*tp_as_number*/
2770 0, /*tp_as_sequence*/
2771 0, /*tp_as_mapping*/
2772 0, /*tp_hash*/
2773 0, /*tp_call*/
2774 0, /*tp_str*/
2775 0, /*tp_getattro*/
2776 0, /*tp_setattro*/
2777 0, /*tp_as_buffer*/
2778 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2779 Pickler_doc, /*tp_doc*/
2780 (traverseproc)Pickler_traverse, /*tp_traverse*/
2781 (inquiry)Pickler_clear, /*tp_clear*/
2782 0, /*tp_richcompare*/
2783 0, /*tp_weaklistoffset*/
2784 0, /*tp_iter*/
2785 0, /*tp_iternext*/
2786 Pickler_methods, /*tp_methods*/
2787 Pickler_members, /*tp_members*/
2788 Pickler_getsets, /*tp_getset*/
2789 0, /*tp_base*/
2790 0, /*tp_dict*/
2791 0, /*tp_descr_get*/
2792 0, /*tp_descr_set*/
2793 0, /*tp_dictoffset*/
2794 (initproc)Pickler_init, /*tp_init*/
2795 PyType_GenericAlloc, /*tp_alloc*/
2796 PyType_GenericNew, /*tp_new*/
2797 PyObject_GC_Del, /*tp_free*/
2798 0, /*tp_is_gc*/
2799};
2800
2801/* Temporary helper for calling self.find_class().
2802
2803 XXX: It would be nice to able to avoid Python function call overhead, by
2804 using directly the C version of find_class(), when find_class() is not
2805 overridden by a subclass. Although, this could become rather hackish. A
2806 simpler optimization would be to call the C function when self is not a
2807 subclass instance. */
2808static PyObject *
2809find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
2810{
2811 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
2812 module_name, global_name);
2813}
2814
2815static int
2816marker(UnpicklerObject *self)
2817{
2818 if (self->num_marks < 1) {
2819 PyErr_SetString(UnpicklingError, "could not find MARK");
2820 return -1;
2821 }
2822
2823 return self->marks[--self->num_marks];
2824}
2825
2826static int
2827load_none(UnpicklerObject *self)
2828{
2829 PDATA_APPEND(self->stack, Py_None, -1);
2830 return 0;
2831}
2832
2833static int
2834bad_readline(void)
2835{
2836 PyErr_SetString(UnpicklingError, "pickle data was truncated");
2837 return -1;
2838}
2839
2840static int
2841load_int(UnpicklerObject *self)
2842{
2843 PyObject *value;
2844 char *endptr, *s;
2845 Py_ssize_t len;
2846 long x;
2847
2848 if ((len = unpickler_readline(self, &s)) < 0)
2849 return -1;
2850 if (len < 2)
2851 return bad_readline();
2852
2853 errno = 0;
2854 /* XXX: Should the base argument of strtol() be explicitly set to 10? */
2855 x = strtol(s, &endptr, 0);
2856
2857 if (errno || (*endptr != '\n') || (endptr[1] != '\0')) {
2858 /* Hm, maybe we've got something long. Let's try reading
2859 * it as a Python long object. */
2860 errno = 0;
2861 /* XXX: Same thing about the base here. */
2862 value = PyLong_FromString(s, NULL, 0);
2863 if (value == NULL) {
2864 PyErr_SetString(PyExc_ValueError,
2865 "could not convert string to int");
2866 return -1;
2867 }
2868 }
2869 else {
2870 if (len == 3 && (x == 0 || x == 1)) {
2871 if ((value = PyBool_FromLong(x)) == NULL)
2872 return -1;
2873 }
2874 else {
2875 if ((value = PyLong_FromLong(x)) == NULL)
2876 return -1;
2877 }
2878 }
2879
2880 PDATA_PUSH(self->stack, value, -1);
2881 return 0;
2882}
2883
2884static int
2885load_bool(UnpicklerObject *self, PyObject *boolean)
2886{
2887 assert(boolean == Py_True || boolean == Py_False);
2888 PDATA_APPEND(self->stack, boolean, -1);
2889 return 0;
2890}
2891
2892/* s contains x bytes of a little-endian integer. Return its value as a
2893 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
2894 * int, but when x is 4 it's a signed one. This is an historical source
2895 * of x-platform bugs.
2896 */
2897static long
2898calc_binint(char *bytes, int size)
2899{
2900 unsigned char *s = (unsigned char *)bytes;
2901 int i = size;
2902 long x = 0;
2903
2904 for (i = 0; i < size; i++) {
2905 x |= (long)s[i] << (i * 8);
2906 }
2907
2908 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
2909 * is signed, so on a box with longs bigger than 4 bytes we need
2910 * to extend a BININT's sign bit to the full width.
2911 */
2912 if (SIZEOF_LONG > 4 && size == 4) {
2913 x |= -(x & (1L << 31));
2914 }
2915
2916 return x;
2917}
2918
2919static int
2920load_binintx(UnpicklerObject *self, char *s, int size)
2921{
2922 PyObject *value;
2923 long x;
2924
2925 x = calc_binint(s, size);
2926
2927 if ((value = PyLong_FromLong(x)) == NULL)
2928 return -1;
2929
2930 PDATA_PUSH(self->stack, value, -1);
2931 return 0;
2932}
2933
2934static int
2935load_binint(UnpicklerObject *self)
2936{
2937 char *s;
2938
2939 if (unpickler_read(self, &s, 4) < 0)
2940 return -1;
2941
2942 return load_binintx(self, s, 4);
2943}
2944
2945static int
2946load_binint1(UnpicklerObject *self)
2947{
2948 char *s;
2949
2950 if (unpickler_read(self, &s, 1) < 0)
2951 return -1;
2952
2953 return load_binintx(self, s, 1);
2954}
2955
2956static int
2957load_binint2(UnpicklerObject *self)
2958{
2959 char *s;
2960
2961 if (unpickler_read(self, &s, 2) < 0)
2962 return -1;
2963
2964 return load_binintx(self, s, 2);
2965}
2966
2967static int
2968load_long(UnpicklerObject *self)
2969{
2970 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00002971 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002972 Py_ssize_t len;
2973
2974 if ((len = unpickler_readline(self, &s)) < 0)
2975 return -1;
2976 if (len < 2)
2977 return bad_readline();
2978
Mark Dickinson8dd05142009-01-20 20:43:58 +00002979 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
2980 the 'L' before calling PyLong_FromString. In order to maintain
2981 compatibility with Python 3.0.0, we don't actually *require*
2982 the 'L' to be present. */
2983 if (s[len-2] == 'L') {
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00002984 s[len-2] = '\0';
Mark Dickinson8dd05142009-01-20 20:43:58 +00002985 }
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00002986 /* XXX: Should the base argument explicitly set to 10? */
2987 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00002988 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002989 return -1;
2990
2991 PDATA_PUSH(self->stack, value, -1);
2992 return 0;
2993}
2994
2995/* 'size' bytes contain the # of bytes of little-endian 256's-complement
2996 * data following.
2997 */
2998static int
2999load_counted_long(UnpicklerObject *self, int size)
3000{
3001 PyObject *value;
3002 char *nbytes;
3003 char *pdata;
3004
3005 assert(size == 1 || size == 4);
3006 if (unpickler_read(self, &nbytes, size) < 0)
3007 return -1;
3008
3009 size = calc_binint(nbytes, size);
3010 if (size < 0) {
3011 /* Corrupt or hostile pickle -- we never write one like this */
3012 PyErr_SetString(UnpicklingError,
3013 "LONG pickle has negative byte count");
3014 return -1;
3015 }
3016
3017 if (size == 0)
3018 value = PyLong_FromLong(0L);
3019 else {
3020 /* Read the raw little-endian bytes and convert. */
3021 if (unpickler_read(self, &pdata, size) < 0)
3022 return -1;
3023 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
3024 1 /* little endian */ , 1 /* signed */ );
3025 }
3026 if (value == NULL)
3027 return -1;
3028 PDATA_PUSH(self->stack, value, -1);
3029 return 0;
3030}
3031
3032static int
3033load_float(UnpicklerObject *self)
3034{
3035 PyObject *value;
3036 char *endptr, *s;
3037 Py_ssize_t len;
3038 double d;
3039
3040 if ((len = unpickler_readline(self, &s)) < 0)
3041 return -1;
3042 if (len < 2)
3043 return bad_readline();
3044
3045 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00003046 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
3047 if (d == -1.0 && PyErr_Occurred())
3048 return -1;
3049 if ((endptr[0] != '\n') || (endptr[1] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003050 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
3051 return -1;
3052 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00003053 value = PyFloat_FromDouble(d);
3054 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003055 return -1;
3056
3057 PDATA_PUSH(self->stack, value, -1);
3058 return 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00003059 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003060
3061static int
3062load_binfloat(UnpicklerObject *self)
3063{
3064 PyObject *value;
3065 double x;
3066 char *s;
3067
3068 if (unpickler_read(self, &s, 8) < 0)
3069 return -1;
3070
3071 x = _PyFloat_Unpack8((unsigned char *)s, 0);
3072 if (x == -1.0 && PyErr_Occurred())
3073 return -1;
3074
3075 if ((value = PyFloat_FromDouble(x)) == NULL)
3076 return -1;
3077
3078 PDATA_PUSH(self->stack, value, -1);
3079 return 0;
3080}
3081
3082static int
3083load_string(UnpicklerObject *self)
3084{
3085 PyObject *bytes;
3086 PyObject *str = NULL;
3087 Py_ssize_t len;
3088 char *s, *p;
3089
3090 if ((len = unpickler_readline(self, &s)) < 0)
3091 return -1;
3092 if (len < 3)
3093 return bad_readline();
3094 if ((s = strdup(s)) == NULL) {
3095 PyErr_NoMemory();
3096 return -1;
3097 }
3098
3099 /* Strip outermost quotes */
3100 while (s[len - 1] <= ' ')
3101 len--;
3102 if (s[0] == '"' && s[len - 1] == '"') {
3103 s[len - 1] = '\0';
3104 p = s + 1;
3105 len -= 2;
3106 }
3107 else if (s[0] == '\'' && s[len - 1] == '\'') {
3108 s[len - 1] = '\0';
3109 p = s + 1;
3110 len -= 2;
3111 }
3112 else {
3113 free(s);
3114 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
3115 return -1;
3116 }
3117
3118 /* Use the PyBytes API to decode the string, since that is what is used
3119 to encode, and then coerce the result to Unicode. */
3120 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
3121 free(s);
3122 if (bytes == NULL)
3123 return -1;
3124 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
3125 Py_DECREF(bytes);
3126 if (str == NULL)
3127 return -1;
3128
3129 PDATA_PUSH(self->stack, str, -1);
3130 return 0;
3131}
3132
3133static int
3134load_binbytes(UnpicklerObject *self)
3135{
3136 PyObject *bytes;
3137 long x;
3138 char *s;
3139
3140 if (unpickler_read(self, &s, 4) < 0)
3141 return -1;
3142
3143 x = calc_binint(s, 4);
3144 if (x < 0) {
3145 PyErr_SetString(UnpicklingError,
3146 "BINBYTES pickle has negative byte count");
3147 return -1;
3148 }
3149
3150 if (unpickler_read(self, &s, x) < 0)
3151 return -1;
3152 bytes = PyBytes_FromStringAndSize(s, x);
3153 if (bytes == NULL)
3154 return -1;
3155
3156 PDATA_PUSH(self->stack, bytes, -1);
3157 return 0;
3158}
3159
3160static int
3161load_short_binbytes(UnpicklerObject *self)
3162{
3163 PyObject *bytes;
3164 unsigned char x;
3165 char *s;
3166
3167 if (unpickler_read(self, &s, 1) < 0)
3168 return -1;
3169
3170 x = (unsigned char)s[0];
3171
3172 if (unpickler_read(self, &s, x) < 0)
3173 return -1;
3174
3175 bytes = PyBytes_FromStringAndSize(s, x);
3176 if (bytes == NULL)
3177 return -1;
3178
3179 PDATA_PUSH(self->stack, bytes, -1);
3180 return 0;
3181}
3182
3183static int
3184load_binstring(UnpicklerObject *self)
3185{
3186 PyObject *str;
3187 long x;
3188 char *s;
3189
3190 if (unpickler_read(self, &s, 4) < 0)
3191 return -1;
3192
3193 x = calc_binint(s, 4);
3194 if (x < 0) {
3195 PyErr_SetString(UnpicklingError,
3196 "BINSTRING pickle has negative byte count");
3197 return -1;
3198 }
3199
3200 if (unpickler_read(self, &s, x) < 0)
3201 return -1;
3202
3203 /* Convert Python 2.x strings to unicode. */
3204 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3205 if (str == NULL)
3206 return -1;
3207
3208 PDATA_PUSH(self->stack, str, -1);
3209 return 0;
3210}
3211
3212static int
3213load_short_binstring(UnpicklerObject *self)
3214{
3215 PyObject *str;
3216 unsigned char x;
3217 char *s;
3218
3219 if (unpickler_read(self, &s, 1) < 0)
3220 return -1;
3221
3222 x = (unsigned char)s[0];
3223
3224 if (unpickler_read(self, &s, x) < 0)
3225 return -1;
3226
3227 /* Convert Python 2.x strings to unicode. */
3228 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3229 if (str == NULL)
3230 return -1;
3231
3232 PDATA_PUSH(self->stack, str, -1);
3233 return 0;
3234}
3235
3236static int
3237load_unicode(UnpicklerObject *self)
3238{
3239 PyObject *str;
3240 Py_ssize_t len;
3241 char *s;
3242
3243 if ((len = unpickler_readline(self, &s)) < 0)
3244 return -1;
3245 if (len < 1)
3246 return bad_readline();
3247
3248 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
3249 if (str == NULL)
3250 return -1;
3251
3252 PDATA_PUSH(self->stack, str, -1);
3253 return 0;
3254}
3255
3256static int
3257load_binunicode(UnpicklerObject *self)
3258{
3259 PyObject *str;
3260 long size;
3261 char *s;
3262
3263 if (unpickler_read(self, &s, 4) < 0)
3264 return -1;
3265
3266 size = calc_binint(s, 4);
3267 if (size < 0) {
3268 PyErr_SetString(UnpicklingError,
3269 "BINUNICODE pickle has negative byte count");
3270 return -1;
3271 }
3272
3273 if (unpickler_read(self, &s, size) < 0)
3274 return -1;
3275
3276 str = PyUnicode_DecodeUTF8(s, size, NULL);
3277 if (str == NULL)
3278 return -1;
3279
3280 PDATA_PUSH(self->stack, str, -1);
3281 return 0;
3282}
3283
3284static int
3285load_tuple(UnpicklerObject *self)
3286{
3287 PyObject *tuple;
3288 int i;
3289
3290 if ((i = marker(self)) < 0)
3291 return -1;
3292
3293 tuple = Pdata_poptuple(self->stack, i);
3294 if (tuple == NULL)
3295 return -1;
3296 PDATA_PUSH(self->stack, tuple, -1);
3297 return 0;
3298}
3299
3300static int
3301load_counted_tuple(UnpicklerObject *self, int len)
3302{
3303 PyObject *tuple;
3304
3305 tuple = PyTuple_New(len);
3306 if (tuple == NULL)
3307 return -1;
3308
3309 while (--len >= 0) {
3310 PyObject *item;
3311
3312 PDATA_POP(self->stack, item);
3313 if (item == NULL)
3314 return -1;
3315 PyTuple_SET_ITEM(tuple, len, item);
3316 }
3317 PDATA_PUSH(self->stack, tuple, -1);
3318 return 0;
3319}
3320
3321static int
3322load_empty_list(UnpicklerObject *self)
3323{
3324 PyObject *list;
3325
3326 if ((list = PyList_New(0)) == NULL)
3327 return -1;
3328 PDATA_PUSH(self->stack, list, -1);
3329 return 0;
3330}
3331
3332static int
3333load_empty_dict(UnpicklerObject *self)
3334{
3335 PyObject *dict;
3336
3337 if ((dict = PyDict_New()) == NULL)
3338 return -1;
3339 PDATA_PUSH(self->stack, dict, -1);
3340 return 0;
3341}
3342
3343static int
3344load_list(UnpicklerObject *self)
3345{
3346 PyObject *list;
3347 int i;
3348
3349 if ((i = marker(self)) < 0)
3350 return -1;
3351
3352 list = Pdata_poplist(self->stack, i);
3353 if (list == NULL)
3354 return -1;
3355 PDATA_PUSH(self->stack, list, -1);
3356 return 0;
3357}
3358
3359static int
3360load_dict(UnpicklerObject *self)
3361{
3362 PyObject *dict, *key, *value;
3363 int i, j, k;
3364
3365 if ((i = marker(self)) < 0)
3366 return -1;
3367 j = self->stack->length;
3368
3369 if ((dict = PyDict_New()) == NULL)
3370 return -1;
3371
3372 for (k = i + 1; k < j; k += 2) {
3373 key = self->stack->data[k - 1];
3374 value = self->stack->data[k];
3375 if (PyDict_SetItem(dict, key, value) < 0) {
3376 Py_DECREF(dict);
3377 return -1;
3378 }
3379 }
3380 Pdata_clear(self->stack, i);
3381 PDATA_PUSH(self->stack, dict, -1);
3382 return 0;
3383}
3384
3385static PyObject *
3386instantiate(PyObject *cls, PyObject *args)
3387{
3388 PyObject *r = NULL;
3389
3390 /* XXX: The pickle.py module does not create instances this way when the
3391 args tuple is empty. See Unpickler._instantiate(). */
3392 if ((r = PyObject_CallObject(cls, args)))
3393 return r;
3394
3395 /* XXX: Is this still nescessary? */
3396 {
3397 PyObject *tp, *v, *tb, *tmp_value;
3398
3399 PyErr_Fetch(&tp, &v, &tb);
3400 tmp_value = v;
3401 /* NULL occurs when there was a KeyboardInterrupt */
3402 if (tmp_value == NULL)
3403 tmp_value = Py_None;
3404 if ((r = PyTuple_Pack(3, tmp_value, cls, args))) {
3405 Py_XDECREF(v);
3406 v = r;
3407 }
3408 PyErr_Restore(tp, v, tb);
3409 }
3410 return NULL;
3411}
3412
3413static int
3414load_obj(UnpicklerObject *self)
3415{
3416 PyObject *cls, *args, *obj = NULL;
3417 int i;
3418
3419 if ((i = marker(self)) < 0)
3420 return -1;
3421
3422 args = Pdata_poptuple(self->stack, i + 1);
3423 if (args == NULL)
3424 return -1;
3425
3426 PDATA_POP(self->stack, cls);
3427 if (cls) {
3428 obj = instantiate(cls, args);
3429 Py_DECREF(cls);
3430 }
3431 Py_DECREF(args);
3432 if (obj == NULL)
3433 return -1;
3434
3435 PDATA_PUSH(self->stack, obj, -1);
3436 return 0;
3437}
3438
3439static int
3440load_inst(UnpicklerObject *self)
3441{
3442 PyObject *cls = NULL;
3443 PyObject *args = NULL;
3444 PyObject *obj = NULL;
3445 PyObject *module_name;
3446 PyObject *class_name;
3447 Py_ssize_t len;
3448 int i;
3449 char *s;
3450
3451 if ((i = marker(self)) < 0)
3452 return -1;
3453 if ((len = unpickler_readline(self, &s)) < 0)
3454 return -1;
3455 if (len < 2)
3456 return bad_readline();
3457
3458 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
3459 identifiers are permitted in Python 3.0, since the INST opcode is only
3460 supported by older protocols on Python 2.x. */
3461 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3462 if (module_name == NULL)
3463 return -1;
3464
3465 if ((len = unpickler_readline(self, &s)) >= 0) {
3466 if (len < 2)
3467 return bad_readline();
3468 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3469 if (class_name == NULL) {
3470 cls = find_class(self, module_name, class_name);
3471 Py_DECREF(class_name);
3472 }
3473 }
3474 Py_DECREF(module_name);
3475
3476 if (cls == NULL)
3477 return -1;
3478
3479 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
3480 obj = instantiate(cls, args);
3481 Py_DECREF(args);
3482 }
3483 Py_DECREF(cls);
3484
3485 if (obj == NULL)
3486 return -1;
3487
3488 PDATA_PUSH(self->stack, obj, -1);
3489 return 0;
3490}
3491
3492static int
3493load_newobj(UnpicklerObject *self)
3494{
3495 PyObject *args = NULL;
3496 PyObject *clsraw = NULL;
3497 PyTypeObject *cls; /* clsraw cast to its true type */
3498 PyObject *obj;
3499
3500 /* Stack is ... cls argtuple, and we want to call
3501 * cls.__new__(cls, *argtuple).
3502 */
3503 PDATA_POP(self->stack, args);
3504 if (args == NULL)
3505 goto error;
3506 if (!PyTuple_Check(args)) {
3507 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
3508 goto error;
3509 }
3510
3511 PDATA_POP(self->stack, clsraw);
3512 cls = (PyTypeObject *)clsraw;
3513 if (cls == NULL)
3514 goto error;
3515 if (!PyType_Check(cls)) {
3516 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3517 "isn't a type object");
3518 goto error;
3519 }
3520 if (cls->tp_new == NULL) {
3521 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3522 "has NULL tp_new");
3523 goto error;
3524 }
3525
3526 /* Call __new__. */
3527 obj = cls->tp_new(cls, args, NULL);
3528 if (obj == NULL)
3529 goto error;
3530
3531 Py_DECREF(args);
3532 Py_DECREF(clsraw);
3533 PDATA_PUSH(self->stack, obj, -1);
3534 return 0;
3535
3536 error:
3537 Py_XDECREF(args);
3538 Py_XDECREF(clsraw);
3539 return -1;
3540}
3541
3542static int
3543load_global(UnpicklerObject *self)
3544{
3545 PyObject *global = NULL;
3546 PyObject *module_name;
3547 PyObject *global_name;
3548 Py_ssize_t len;
3549 char *s;
3550
3551 if ((len = unpickler_readline(self, &s)) < 0)
3552 return -1;
3553 if (len < 2)
3554 return bad_readline();
3555 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3556 if (!module_name)
3557 return -1;
3558
3559 if ((len = unpickler_readline(self, &s)) >= 0) {
3560 if (len < 2) {
3561 Py_DECREF(module_name);
3562 return bad_readline();
3563 }
3564 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3565 if (global_name) {
3566 global = find_class(self, module_name, global_name);
3567 Py_DECREF(global_name);
3568 }
3569 }
3570 Py_DECREF(module_name);
3571
3572 if (global == NULL)
3573 return -1;
3574 PDATA_PUSH(self->stack, global, -1);
3575 return 0;
3576}
3577
3578static int
3579load_persid(UnpicklerObject *self)
3580{
3581 PyObject *pid;
3582 Py_ssize_t len;
3583 char *s;
3584
3585 if (self->pers_func) {
3586 if ((len = unpickler_readline(self, &s)) < 0)
3587 return -1;
3588 if (len < 2)
3589 return bad_readline();
3590
3591 pid = PyBytes_FromStringAndSize(s, len - 1);
3592 if (pid == NULL)
3593 return -1;
3594
3595 /* Ugh... this does not leak since unpickler_call() steals the
3596 reference to pid first. */
3597 pid = unpickler_call(self, self->pers_func, pid);
3598 if (pid == NULL)
3599 return -1;
3600
3601 PDATA_PUSH(self->stack, pid, -1);
3602 return 0;
3603 }
3604 else {
3605 PyErr_SetString(UnpicklingError,
3606 "A load persistent id instruction was encountered,\n"
3607 "but no persistent_load function was specified.");
3608 return -1;
3609 }
3610}
3611
3612static int
3613load_binpersid(UnpicklerObject *self)
3614{
3615 PyObject *pid;
3616
3617 if (self->pers_func) {
3618 PDATA_POP(self->stack, pid);
3619 if (pid == NULL)
3620 return -1;
3621
3622 /* Ugh... this does not leak since unpickler_call() steals the
3623 reference to pid first. */
3624 pid = unpickler_call(self, self->pers_func, pid);
3625 if (pid == NULL)
3626 return -1;
3627
3628 PDATA_PUSH(self->stack, pid, -1);
3629 return 0;
3630 }
3631 else {
3632 PyErr_SetString(UnpicklingError,
3633 "A load persistent id instruction was encountered,\n"
3634 "but no persistent_load function was specified.");
3635 return -1;
3636 }
3637}
3638
3639static int
3640load_pop(UnpicklerObject *self)
3641{
Collin Winter8ca69de2009-05-26 16:53:41 +00003642 int len = self->stack->length;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003643
3644 /* Note that we split the (pickle.py) stack into two stacks,
3645 * an object stack and a mark stack. We have to be clever and
3646 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00003647 * mark stack first, and only signalling a stack underflow if
3648 * the object stack is empty and the mark stack doesn't match
3649 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003650 */
Collin Winter8ca69de2009-05-26 16:53:41 +00003651 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003652 self->num_marks--;
Collin Winter8ca69de2009-05-26 16:53:41 +00003653 } else if (len >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003654 len--;
3655 Py_DECREF(self->stack->data[len]);
3656 self->stack->length = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00003657 } else {
3658 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003659 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003660 return 0;
3661}
3662
3663static int
3664load_pop_mark(UnpicklerObject *self)
3665{
3666 int i;
3667
3668 if ((i = marker(self)) < 0)
3669 return -1;
3670
3671 Pdata_clear(self->stack, i);
3672
3673 return 0;
3674}
3675
3676static int
3677load_dup(UnpicklerObject *self)
3678{
3679 PyObject *last;
3680 int len;
3681
3682 if ((len = self->stack->length) <= 0)
3683 return stack_underflow();
3684 last = self->stack->data[len - 1];
3685 PDATA_APPEND(self->stack, last, -1);
3686 return 0;
3687}
3688
3689static int
3690load_get(UnpicklerObject *self)
3691{
3692 PyObject *key, *value;
3693 Py_ssize_t len;
3694 char *s;
3695
3696 if ((len = unpickler_readline(self, &s)) < 0)
3697 return -1;
3698 if (len < 2)
3699 return bad_readline();
3700
3701 key = PyLong_FromString(s, NULL, 10);
3702 if (key == NULL)
3703 return -1;
3704
3705 value = PyDict_GetItemWithError(self->memo, key);
3706 if (value == NULL) {
3707 if (!PyErr_Occurred())
3708 PyErr_SetObject(PyExc_KeyError, key);
3709 Py_DECREF(key);
3710 return -1;
3711 }
3712 Py_DECREF(key);
3713
3714 PDATA_APPEND(self->stack, value, -1);
3715 return 0;
3716}
3717
3718static int
3719load_binget(UnpicklerObject *self)
3720{
3721 PyObject *key, *value;
3722 char *s;
3723
3724 if (unpickler_read(self, &s, 1) < 0)
3725 return -1;
3726
3727 /* Here, the unsigned cast is necessary to avoid negative values. */
3728 key = PyLong_FromLong((long)(unsigned char)s[0]);
3729 if (key == NULL)
3730 return -1;
3731
3732 value = PyDict_GetItemWithError(self->memo, key);
3733 if (value == NULL) {
3734 if (!PyErr_Occurred())
3735 PyErr_SetObject(PyExc_KeyError, key);
3736 Py_DECREF(key);
3737 return -1;
3738 }
3739 Py_DECREF(key);
3740
3741 PDATA_APPEND(self->stack, value, -1);
3742 return 0;
3743}
3744
3745static int
3746load_long_binget(UnpicklerObject *self)
3747{
3748 PyObject *key, *value;
3749 char *s;
3750 long k;
3751
3752 if (unpickler_read(self, &s, 4) < 0)
3753 return -1;
3754
3755 k = (long)(unsigned char)s[0];
3756 k |= (long)(unsigned char)s[1] << 8;
3757 k |= (long)(unsigned char)s[2] << 16;
3758 k |= (long)(unsigned char)s[3] << 24;
3759
3760 key = PyLong_FromLong(k);
3761 if (key == NULL)
3762 return -1;
3763
3764 value = PyDict_GetItemWithError(self->memo, key);
3765 if (value == NULL) {
3766 if (!PyErr_Occurred())
3767 PyErr_SetObject(PyExc_KeyError, key);
3768 Py_DECREF(key);
3769 return -1;
3770 }
3771 Py_DECREF(key);
3772
3773 PDATA_APPEND(self->stack, value, -1);
3774 return 0;
3775}
3776
3777/* Push an object from the extension registry (EXT[124]). nbytes is
3778 * the number of bytes following the opcode, holding the index (code) value.
3779 */
3780static int
3781load_extension(UnpicklerObject *self, int nbytes)
3782{
3783 char *codebytes; /* the nbytes bytes after the opcode */
3784 long code; /* calc_binint returns long */
3785 PyObject *py_code; /* code as a Python int */
3786 PyObject *obj; /* the object to push */
3787 PyObject *pair; /* (module_name, class_name) */
3788 PyObject *module_name, *class_name;
3789
3790 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
3791 if (unpickler_read(self, &codebytes, nbytes) < 0)
3792 return -1;
3793 code = calc_binint(codebytes, nbytes);
3794 if (code <= 0) { /* note that 0 is forbidden */
3795 /* Corrupt or hostile pickle. */
3796 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
3797 return -1;
3798 }
3799
3800 /* Look for the code in the cache. */
3801 py_code = PyLong_FromLong(code);
3802 if (py_code == NULL)
3803 return -1;
3804 obj = PyDict_GetItem(extension_cache, py_code);
3805 if (obj != NULL) {
3806 /* Bingo. */
3807 Py_DECREF(py_code);
3808 PDATA_APPEND(self->stack, obj, -1);
3809 return 0;
3810 }
3811
3812 /* Look up the (module_name, class_name) pair. */
3813 pair = PyDict_GetItem(inverted_registry, py_code);
3814 if (pair == NULL) {
3815 Py_DECREF(py_code);
3816 PyErr_Format(PyExc_ValueError, "unregistered extension "
3817 "code %ld", code);
3818 return -1;
3819 }
3820 /* Since the extension registry is manipulable via Python code,
3821 * confirm that pair is really a 2-tuple of strings.
3822 */
3823 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
3824 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
3825 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
3826 Py_DECREF(py_code);
3827 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
3828 "isn't a 2-tuple of strings", code);
3829 return -1;
3830 }
3831 /* Load the object. */
3832 obj = find_class(self, module_name, class_name);
3833 if (obj == NULL) {
3834 Py_DECREF(py_code);
3835 return -1;
3836 }
3837 /* Cache code -> obj. */
3838 code = PyDict_SetItem(extension_cache, py_code, obj);
3839 Py_DECREF(py_code);
3840 if (code < 0) {
3841 Py_DECREF(obj);
3842 return -1;
3843 }
3844 PDATA_PUSH(self->stack, obj, -1);
3845 return 0;
3846}
3847
3848static int
3849load_put(UnpicklerObject *self)
3850{
3851 PyObject *key, *value;
3852 Py_ssize_t len;
3853 char *s;
3854 int x;
3855
3856 if ((len = unpickler_readline(self, &s)) < 0)
3857 return -1;
3858 if (len < 2)
3859 return bad_readline();
3860 if ((x = self->stack->length) <= 0)
3861 return stack_underflow();
3862
3863 key = PyLong_FromString(s, NULL, 10);
3864 if (key == NULL)
3865 return -1;
3866 value = self->stack->data[x - 1];
3867
3868 x = PyDict_SetItem(self->memo, key, value);
3869 Py_DECREF(key);
3870 return x;
3871}
3872
3873static int
3874load_binput(UnpicklerObject *self)
3875{
3876 PyObject *key, *value;
3877 char *s;
3878 int x;
3879
3880 if (unpickler_read(self, &s, 1) < 0)
3881 return -1;
3882 if ((x = self->stack->length) <= 0)
3883 return stack_underflow();
3884
3885 key = PyLong_FromLong((long)(unsigned char)s[0]);
3886 if (key == NULL)
3887 return -1;
3888 value = self->stack->data[x - 1];
3889
3890 x = PyDict_SetItem(self->memo, key, value);
3891 Py_DECREF(key);
3892 return x;
3893}
3894
3895static int
3896load_long_binput(UnpicklerObject *self)
3897{
3898 PyObject *key, *value;
3899 long k;
3900 char *s;
3901 int x;
3902
3903 if (unpickler_read(self, &s, 4) < 0)
3904 return -1;
3905 if ((x = self->stack->length) <= 0)
3906 return stack_underflow();
3907
3908 k = (long)(unsigned char)s[0];
3909 k |= (long)(unsigned char)s[1] << 8;
3910 k |= (long)(unsigned char)s[2] << 16;
3911 k |= (long)(unsigned char)s[3] << 24;
3912
3913 key = PyLong_FromLong(k);
3914 if (key == NULL)
3915 return -1;
3916 value = self->stack->data[x - 1];
3917
3918 x = PyDict_SetItem(self->memo, key, value);
3919 Py_DECREF(key);
3920 return x;
3921}
3922
3923static int
3924do_append(UnpicklerObject *self, int x)
3925{
3926 PyObject *value;
3927 PyObject *list;
3928 int len, i;
3929
3930 len = self->stack->length;
3931 if (x > len || x <= 0)
3932 return stack_underflow();
3933 if (len == x) /* nothing to do */
3934 return 0;
3935
3936 list = self->stack->data[x - 1];
3937
3938 if (PyList_Check(list)) {
3939 PyObject *slice;
3940 Py_ssize_t list_len;
3941
3942 slice = Pdata_poplist(self->stack, x);
3943 if (!slice)
3944 return -1;
3945 list_len = PyList_GET_SIZE(list);
3946 i = PyList_SetSlice(list, list_len, list_len, slice);
3947 Py_DECREF(slice);
3948 return i;
3949 }
3950 else {
3951 PyObject *append_func;
3952
3953 append_func = PyObject_GetAttrString(list, "append");
3954 if (append_func == NULL)
3955 return -1;
3956 for (i = x; i < len; i++) {
3957 PyObject *result;
3958
3959 value = self->stack->data[i];
3960 result = unpickler_call(self, append_func, value);
3961 if (result == NULL) {
3962 Pdata_clear(self->stack, i + 1);
3963 self->stack->length = x;
3964 return -1;
3965 }
3966 Py_DECREF(result);
3967 }
3968 self->stack->length = x;
3969 }
3970
3971 return 0;
3972}
3973
3974static int
3975load_append(UnpicklerObject *self)
3976{
3977 return do_append(self, self->stack->length - 1);
3978}
3979
3980static int
3981load_appends(UnpicklerObject *self)
3982{
3983 return do_append(self, marker(self));
3984}
3985
3986static int
3987do_setitems(UnpicklerObject *self, int x)
3988{
3989 PyObject *value, *key;
3990 PyObject *dict;
3991 int len, i;
3992 int status = 0;
3993
3994 len = self->stack->length;
3995 if (x > len || x <= 0)
3996 return stack_underflow();
3997 if (len == x) /* nothing to do */
3998 return 0;
3999 if ((len - x) % 2 != 0) {
4000 /* Currupt or hostile pickle -- we never write one like this. */
4001 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
4002 return -1;
4003 }
4004
4005 /* Here, dict does not actually need to be a PyDict; it could be anything
4006 that supports the __setitem__ attribute. */
4007 dict = self->stack->data[x - 1];
4008
4009 for (i = x + 1; i < len; i += 2) {
4010 key = self->stack->data[i - 1];
4011 value = self->stack->data[i];
4012 if (PyObject_SetItem(dict, key, value) < 0) {
4013 status = -1;
4014 break;
4015 }
4016 }
4017
4018 Pdata_clear(self->stack, x);
4019 return status;
4020}
4021
4022static int
4023load_setitem(UnpicklerObject *self)
4024{
4025 return do_setitems(self, self->stack->length - 2);
4026}
4027
4028static int
4029load_setitems(UnpicklerObject *self)
4030{
4031 return do_setitems(self, marker(self));
4032}
4033
4034static int
4035load_build(UnpicklerObject *self)
4036{
4037 PyObject *state, *inst, *slotstate;
4038 PyObject *setstate;
4039 int status = 0;
4040
4041 /* Stack is ... instance, state. We want to leave instance at
4042 * the stack top, possibly mutated via instance.__setstate__(state).
4043 */
4044 if (self->stack->length < 2)
4045 return stack_underflow();
4046
4047 PDATA_POP(self->stack, state);
4048 if (state == NULL)
4049 return -1;
4050
4051 inst = self->stack->data[self->stack->length - 1];
4052
4053 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00004054 if (setstate == NULL) {
4055 if (PyErr_ExceptionMatches(PyExc_AttributeError))
4056 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00004057 else {
4058 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00004059 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00004060 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004061 }
4062 else {
4063 PyObject *result;
4064
4065 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitroud79dc622008-09-05 00:03:33 +00004066 /* Ugh... this does not leak since unpickler_call() steals the
4067 reference to state first. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004068 result = unpickler_call(self, setstate, state);
4069 Py_DECREF(setstate);
4070 if (result == NULL)
4071 return -1;
4072 Py_DECREF(result);
4073 return 0;
4074 }
4075
4076 /* A default __setstate__. First see whether state embeds a
4077 * slot state dict too (a proto 2 addition).
4078 */
4079 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
4080 PyObject *tmp = state;
4081
4082 state = PyTuple_GET_ITEM(tmp, 0);
4083 slotstate = PyTuple_GET_ITEM(tmp, 1);
4084 Py_INCREF(state);
4085 Py_INCREF(slotstate);
4086 Py_DECREF(tmp);
4087 }
4088 else
4089 slotstate = NULL;
4090
4091 /* Set inst.__dict__ from the state dict (if any). */
4092 if (state != Py_None) {
4093 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00004094 PyObject *d_key, *d_value;
4095 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004096
4097 if (!PyDict_Check(state)) {
4098 PyErr_SetString(UnpicklingError, "state is not a dictionary");
4099 goto error;
4100 }
4101 dict = PyObject_GetAttrString(inst, "__dict__");
4102 if (dict == NULL)
4103 goto error;
4104
Antoine Pitroua9f48a02009-05-02 21:41:14 +00004105 i = 0;
4106 while (PyDict_Next(state, &i, &d_key, &d_value)) {
4107 /* normally the keys for instance attributes are
4108 interned. we should try to do that here. */
4109 Py_INCREF(d_key);
4110 if (PyUnicode_CheckExact(d_key))
4111 PyUnicode_InternInPlace(&d_key);
4112 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
4113 Py_DECREF(d_key);
4114 goto error;
4115 }
4116 Py_DECREF(d_key);
4117 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004118 Py_DECREF(dict);
4119 }
4120
4121 /* Also set instance attributes from the slotstate dict (if any). */
4122 if (slotstate != NULL) {
4123 PyObject *d_key, *d_value;
4124 Py_ssize_t i;
4125
4126 if (!PyDict_Check(slotstate)) {
4127 PyErr_SetString(UnpicklingError,
4128 "slot state is not a dictionary");
4129 goto error;
4130 }
4131 i = 0;
4132 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
4133 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
4134 goto error;
4135 }
4136 }
4137
4138 if (0) {
4139 error:
4140 status = -1;
4141 }
4142
4143 Py_DECREF(state);
4144 Py_XDECREF(slotstate);
4145 return status;
4146}
4147
4148static int
4149load_mark(UnpicklerObject *self)
4150{
4151
4152 /* Note that we split the (pickle.py) stack into two stacks, an
4153 * object stack and a mark stack. Here we push a mark onto the
4154 * mark stack.
4155 */
4156
4157 if ((self->num_marks + 1) >= self->marks_size) {
4158 size_t alloc;
4159 int *marks;
4160
4161 /* Use the size_t type to check for overflow. */
4162 alloc = ((size_t)self->num_marks << 1) + 20;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00004163 if (alloc > PY_SSIZE_T_MAX ||
4164 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004165 PyErr_NoMemory();
4166 return -1;
4167 }
4168
4169 if (self->marks == NULL)
4170 marks = (int *)PyMem_Malloc(alloc * sizeof(int));
4171 else
4172 marks = (int *)PyMem_Realloc(self->marks, alloc * sizeof(int));
4173 if (marks == NULL) {
4174 PyErr_NoMemory();
4175 return -1;
4176 }
4177 self->marks = marks;
4178 self->marks_size = (Py_ssize_t)alloc;
4179 }
4180
4181 self->marks[self->num_marks++] = self->stack->length;
4182
4183 return 0;
4184}
4185
4186static int
4187load_reduce(UnpicklerObject *self)
4188{
4189 PyObject *callable = NULL;
4190 PyObject *argtup = NULL;
4191 PyObject *obj = NULL;
4192
4193 PDATA_POP(self->stack, argtup);
4194 if (argtup == NULL)
4195 return -1;
4196 PDATA_POP(self->stack, callable);
4197 if (callable) {
4198 obj = instantiate(callable, argtup);
4199 Py_DECREF(callable);
4200 }
4201 Py_DECREF(argtup);
4202
4203 if (obj == NULL)
4204 return -1;
4205
4206 PDATA_PUSH(self->stack, obj, -1);
4207 return 0;
4208}
4209
4210/* Just raises an error if we don't know the protocol specified. PROTO
4211 * is the first opcode for protocols >= 2.
4212 */
4213static int
4214load_proto(UnpicklerObject *self)
4215{
4216 char *s;
4217 int i;
4218
4219 if (unpickler_read(self, &s, 1) < 0)
4220 return -1;
4221
4222 i = (unsigned char)s[0];
4223 if (i <= HIGHEST_PROTOCOL)
4224 return 0;
4225
4226 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
4227 return -1;
4228}
4229
4230static PyObject *
4231load(UnpicklerObject *self)
4232{
4233 PyObject *err;
4234 PyObject *value = NULL;
4235 char *s;
4236
4237 self->num_marks = 0;
4238 if (self->stack->length)
4239 Pdata_clear(self->stack, 0);
4240
4241 /* Convenient macros for the dispatch while-switch loop just below. */
4242#define OP(opcode, load_func) \
4243 case opcode: if (load_func(self) < 0) break; continue;
4244
4245#define OP_ARG(opcode, load_func, arg) \
4246 case opcode: if (load_func(self, (arg)) < 0) break; continue;
4247
4248 while (1) {
4249 if (unpickler_read(self, &s, 1) < 0)
4250 break;
4251
4252 switch ((enum opcode)s[0]) {
4253 OP(NONE, load_none)
4254 OP(BININT, load_binint)
4255 OP(BININT1, load_binint1)
4256 OP(BININT2, load_binint2)
4257 OP(INT, load_int)
4258 OP(LONG, load_long)
4259 OP_ARG(LONG1, load_counted_long, 1)
4260 OP_ARG(LONG4, load_counted_long, 4)
4261 OP(FLOAT, load_float)
4262 OP(BINFLOAT, load_binfloat)
4263 OP(BINBYTES, load_binbytes)
4264 OP(SHORT_BINBYTES, load_short_binbytes)
4265 OP(BINSTRING, load_binstring)
4266 OP(SHORT_BINSTRING, load_short_binstring)
4267 OP(STRING, load_string)
4268 OP(UNICODE, load_unicode)
4269 OP(BINUNICODE, load_binunicode)
4270 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
4271 OP_ARG(TUPLE1, load_counted_tuple, 1)
4272 OP_ARG(TUPLE2, load_counted_tuple, 2)
4273 OP_ARG(TUPLE3, load_counted_tuple, 3)
4274 OP(TUPLE, load_tuple)
4275 OP(EMPTY_LIST, load_empty_list)
4276 OP(LIST, load_list)
4277 OP(EMPTY_DICT, load_empty_dict)
4278 OP(DICT, load_dict)
4279 OP(OBJ, load_obj)
4280 OP(INST, load_inst)
4281 OP(NEWOBJ, load_newobj)
4282 OP(GLOBAL, load_global)
4283 OP(APPEND, load_append)
4284 OP(APPENDS, load_appends)
4285 OP(BUILD, load_build)
4286 OP(DUP, load_dup)
4287 OP(BINGET, load_binget)
4288 OP(LONG_BINGET, load_long_binget)
4289 OP(GET, load_get)
4290 OP(MARK, load_mark)
4291 OP(BINPUT, load_binput)
4292 OP(LONG_BINPUT, load_long_binput)
4293 OP(PUT, load_put)
4294 OP(POP, load_pop)
4295 OP(POP_MARK, load_pop_mark)
4296 OP(SETITEM, load_setitem)
4297 OP(SETITEMS, load_setitems)
4298 OP(PERSID, load_persid)
4299 OP(BINPERSID, load_binpersid)
4300 OP(REDUCE, load_reduce)
4301 OP(PROTO, load_proto)
4302 OP_ARG(EXT1, load_extension, 1)
4303 OP_ARG(EXT2, load_extension, 2)
4304 OP_ARG(EXT4, load_extension, 4)
4305 OP_ARG(NEWTRUE, load_bool, Py_True)
4306 OP_ARG(NEWFALSE, load_bool, Py_False)
4307
4308 case STOP:
4309 break;
4310
4311 case '\0':
4312 PyErr_SetNone(PyExc_EOFError);
4313 return NULL;
4314
4315 default:
4316 PyErr_Format(UnpicklingError,
4317 "invalid load key, '%c'.", s[0]);
4318 return NULL;
4319 }
4320
4321 break; /* and we are done! */
4322 }
4323
4324 /* XXX: It is not clear what this is actually for. */
4325 if ((err = PyErr_Occurred())) {
4326 if (err == PyExc_EOFError) {
4327 PyErr_SetNone(PyExc_EOFError);
4328 }
4329 return NULL;
4330 }
4331
4332 PDATA_POP(self->stack, value);
4333 return value;
4334}
4335
4336PyDoc_STRVAR(Unpickler_load_doc,
4337"load() -> object. Load a pickle."
4338"\n"
4339"Read a pickled object representation from the open file object given in\n"
4340"the constructor, and return the reconstituted object hierarchy specified\n"
4341"therein.\n");
4342
4343static PyObject *
4344Unpickler_load(UnpicklerObject *self)
4345{
4346 /* Check whether the Unpickler was initialized correctly. This prevents
4347 segfaulting if a subclass overridden __init__ with a function that does
4348 not call Unpickler.__init__(). Here, we simply ensure that self->read
4349 is not NULL. */
4350 if (self->read == NULL) {
4351 PyErr_Format(UnpicklingError,
4352 "Unpickler.__init__() was not called by %s.__init__()",
4353 Py_TYPE(self)->tp_name);
4354 return NULL;
4355 }
4356
4357 return load(self);
4358}
4359
4360/* The name of find_class() is misleading. In newer pickle protocols, this
4361 function is used for loading any global (i.e., functions), not just
4362 classes. The name is kept only for backward compatibility. */
4363
4364PyDoc_STRVAR(Unpickler_find_class_doc,
4365"find_class(module_name, global_name) -> object.\n"
4366"\n"
4367"Return an object from a specified module, importing the module if\n"
4368"necessary. Subclasses may override this method (e.g. to restrict\n"
4369"unpickling of arbitrary classes and functions).\n"
4370"\n"
4371"This method is called whenever a class or a function object is\n"
4372"needed. Both arguments passed are str objects.\n");
4373
4374static PyObject *
4375Unpickler_find_class(UnpicklerObject *self, PyObject *args)
4376{
4377 PyObject *global;
4378 PyObject *modules_dict;
4379 PyObject *module;
4380 PyObject *module_name, *global_name;
4381
4382 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
4383 &module_name, &global_name))
4384 return NULL;
4385
4386 modules_dict = PySys_GetObject("modules");
4387 if (modules_dict == NULL)
4388 return NULL;
4389
4390 module = PyDict_GetItem(modules_dict, module_name);
4391 if (module == NULL) {
4392 module = PyImport_Import(module_name);
4393 if (module == NULL)
4394 return NULL;
4395 global = PyObject_GetAttr(module, global_name);
4396 Py_DECREF(module);
4397 }
4398 else {
4399 global = PyObject_GetAttr(module, global_name);
4400 }
4401 return global;
4402}
4403
4404static struct PyMethodDef Unpickler_methods[] = {
4405 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
4406 Unpickler_load_doc},
4407 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
4408 Unpickler_find_class_doc},
4409 {NULL, NULL} /* sentinel */
4410};
4411
4412static void
4413Unpickler_dealloc(UnpicklerObject *self)
4414{
4415 PyObject_GC_UnTrack((PyObject *)self);
4416 Py_XDECREF(self->readline);
4417 Py_XDECREF(self->read);
4418 Py_XDECREF(self->memo);
4419 Py_XDECREF(self->stack);
4420 Py_XDECREF(self->pers_func);
4421 Py_XDECREF(self->arg);
4422 Py_XDECREF(self->last_string);
4423
4424 PyMem_Free(self->marks);
4425 free(self->encoding);
4426 free(self->errors);
4427
4428 Py_TYPE(self)->tp_free((PyObject *)self);
4429}
4430
4431static int
4432Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
4433{
4434 Py_VISIT(self->readline);
4435 Py_VISIT(self->read);
4436 Py_VISIT(self->memo);
4437 Py_VISIT(self->stack);
4438 Py_VISIT(self->pers_func);
4439 Py_VISIT(self->arg);
4440 Py_VISIT(self->last_string);
4441 return 0;
4442}
4443
4444static int
4445Unpickler_clear(UnpicklerObject *self)
4446{
4447 Py_CLEAR(self->readline);
4448 Py_CLEAR(self->read);
4449 Py_CLEAR(self->memo);
4450 Py_CLEAR(self->stack);
4451 Py_CLEAR(self->pers_func);
4452 Py_CLEAR(self->arg);
4453 Py_CLEAR(self->last_string);
4454
4455 PyMem_Free(self->marks);
4456 self->marks = NULL;
4457 free(self->encoding);
4458 self->encoding = NULL;
4459 free(self->errors);
4460 self->errors = NULL;
4461
4462 return 0;
4463}
4464
4465PyDoc_STRVAR(Unpickler_doc,
4466"Unpickler(file, *, encoding='ASCII', errors='strict')"
4467"\n"
4468"This takes a binary file for reading a pickle data stream.\n"
4469"\n"
4470"The protocol version of the pickle is detected automatically, so no\n"
4471"proto argument is needed.\n"
4472"\n"
4473"The file-like object must have two methods, a read() method\n"
4474"that takes an integer argument, and a readline() method that\n"
4475"requires no arguments. Both methods should return bytes.\n"
4476"Thus file-like object can be a binary file object opened for\n"
4477"reading, a BytesIO object, or any other custom object that\n"
4478"meets this interface.\n"
4479"\n"
4480"Optional keyword arguments are encoding and errors, which are\n"
4481"used to decode 8-bit string instances pickled by Python 2.x.\n"
4482"These default to 'ASCII' and 'strict', respectively.\n");
4483
4484static int
4485Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
4486{
4487 static char *kwlist[] = {"file", "encoding", "errors", 0};
4488 PyObject *file;
4489 char *encoding = NULL;
4490 char *errors = NULL;
4491
4492 /* XXX: That is an horrible error message. But, I don't know how to do
4493 better... */
4494 if (Py_SIZE(args) != 1) {
4495 PyErr_Format(PyExc_TypeError,
4496 "%s takes exactly one positional argument (%zd given)",
4497 Py_TYPE(self)->tp_name, Py_SIZE(args));
4498 return -1;
4499 }
4500
4501 /* Arguments parsing needs to be done in the __init__() method to allow
4502 subclasses to define their own __init__() method, which may (or may
4503 not) support Unpickler arguments. However, this means we need to be
4504 extra careful in the other Unpickler methods, since a subclass could
4505 forget to call Unpickler.__init__() thus breaking our internal
4506 invariants. */
4507 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist,
4508 &file, &encoding, &errors))
4509 return -1;
4510
4511 /* In case of multiple __init__() calls, clear previous content. */
4512 if (self->read != NULL)
4513 (void)Unpickler_clear(self);
4514
4515 self->read = PyObject_GetAttrString(file, "read");
4516 self->readline = PyObject_GetAttrString(file, "readline");
4517 if (self->readline == NULL || self->read == NULL)
4518 return -1;
4519
4520 if (encoding == NULL)
4521 encoding = "ASCII";
4522 if (errors == NULL)
4523 errors = "strict";
4524
4525 self->encoding = strdup(encoding);
4526 self->errors = strdup(errors);
4527 if (self->encoding == NULL || self->errors == NULL) {
4528 PyErr_NoMemory();
4529 return -1;
4530 }
4531
4532 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
4533 self->pers_func = PyObject_GetAttrString((PyObject *)self,
4534 "persistent_load");
4535 if (self->pers_func == NULL)
4536 return -1;
4537 }
4538 else {
4539 self->pers_func = NULL;
4540 }
4541
4542 self->stack = (Pdata *)Pdata_New();
4543 if (self->stack == NULL)
4544 return -1;
4545
4546 self->memo = PyDict_New();
4547 if (self->memo == NULL)
4548 return -1;
4549
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00004550 self->last_string = NULL;
4551 self->arg = NULL;
4552
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004553 return 0;
4554}
4555
4556static PyObject *
4557Unpickler_get_memo(UnpicklerObject *self)
4558{
4559 if (self->memo == NULL)
4560 PyErr_SetString(PyExc_AttributeError, "memo");
4561 else
4562 Py_INCREF(self->memo);
4563 return self->memo;
4564}
4565
4566static int
4567Unpickler_set_memo(UnpicklerObject *self, PyObject *value)
4568{
4569 PyObject *tmp;
4570
4571 if (value == NULL) {
4572 PyErr_SetString(PyExc_TypeError,
4573 "attribute deletion is not supported");
4574 return -1;
4575 }
4576 if (!PyDict_Check(value)) {
4577 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
4578 return -1;
4579 }
4580
4581 tmp = self->memo;
4582 Py_INCREF(value);
4583 self->memo = value;
4584 Py_XDECREF(tmp);
4585
4586 return 0;
4587}
4588
4589static PyObject *
4590Unpickler_get_persload(UnpicklerObject *self)
4591{
4592 if (self->pers_func == NULL)
4593 PyErr_SetString(PyExc_AttributeError, "persistent_load");
4594 else
4595 Py_INCREF(self->pers_func);
4596 return self->pers_func;
4597}
4598
4599static int
4600Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
4601{
4602 PyObject *tmp;
4603
4604 if (value == NULL) {
4605 PyErr_SetString(PyExc_TypeError,
4606 "attribute deletion is not supported");
4607 return -1;
4608 }
4609 if (!PyCallable_Check(value)) {
4610 PyErr_SetString(PyExc_TypeError,
4611 "persistent_load must be a callable taking "
4612 "one argument");
4613 return -1;
4614 }
4615
4616 tmp = self->pers_func;
4617 Py_INCREF(value);
4618 self->pers_func = value;
4619 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
4620
4621 return 0;
4622}
4623
4624static PyGetSetDef Unpickler_getsets[] = {
4625 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
4626 {"persistent_load", (getter)Unpickler_get_persload,
4627 (setter)Unpickler_set_persload},
4628 {NULL}
4629};
4630
4631static PyTypeObject Unpickler_Type = {
4632 PyVarObject_HEAD_INIT(NULL, 0)
4633 "_pickle.Unpickler", /*tp_name*/
4634 sizeof(UnpicklerObject), /*tp_basicsize*/
4635 0, /*tp_itemsize*/
4636 (destructor)Unpickler_dealloc, /*tp_dealloc*/
4637 0, /*tp_print*/
4638 0, /*tp_getattr*/
4639 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00004640 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004641 0, /*tp_repr*/
4642 0, /*tp_as_number*/
4643 0, /*tp_as_sequence*/
4644 0, /*tp_as_mapping*/
4645 0, /*tp_hash*/
4646 0, /*tp_call*/
4647 0, /*tp_str*/
4648 0, /*tp_getattro*/
4649 0, /*tp_setattro*/
4650 0, /*tp_as_buffer*/
4651 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4652 Unpickler_doc, /*tp_doc*/
4653 (traverseproc)Unpickler_traverse, /*tp_traverse*/
4654 (inquiry)Unpickler_clear, /*tp_clear*/
4655 0, /*tp_richcompare*/
4656 0, /*tp_weaklistoffset*/
4657 0, /*tp_iter*/
4658 0, /*tp_iternext*/
4659 Unpickler_methods, /*tp_methods*/
4660 0, /*tp_members*/
4661 Unpickler_getsets, /*tp_getset*/
4662 0, /*tp_base*/
4663 0, /*tp_dict*/
4664 0, /*tp_descr_get*/
4665 0, /*tp_descr_set*/
4666 0, /*tp_dictoffset*/
4667 (initproc)Unpickler_init, /*tp_init*/
4668 PyType_GenericAlloc, /*tp_alloc*/
4669 PyType_GenericNew, /*tp_new*/
4670 PyObject_GC_Del, /*tp_free*/
4671 0, /*tp_is_gc*/
4672};
4673
4674static int
4675init_stuff(void)
4676{
4677 PyObject *copyreg;
4678
4679 copyreg = PyImport_ImportModule("copyreg");
4680 if (!copyreg)
4681 return -1;
4682
4683 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
4684 if (!dispatch_table)
4685 goto error;
4686
4687 extension_registry = \
4688 PyObject_GetAttrString(copyreg, "_extension_registry");
4689 if (!extension_registry)
4690 goto error;
4691
4692 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
4693 if (!inverted_registry)
4694 goto error;
4695
4696 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
4697 if (!extension_cache)
4698 goto error;
4699
4700 Py_DECREF(copyreg);
4701
4702 empty_tuple = PyTuple_New(0);
4703 if (empty_tuple == NULL)
4704 return -1;
4705
4706 two_tuple = PyTuple_New(2);
4707 if (two_tuple == NULL)
4708 return -1;
4709 /* We use this temp container with no regard to refcounts, or to
4710 * keeping containees alive. Exempt from GC, because we don't
4711 * want anything looking at two_tuple() by magic.
4712 */
4713 PyObject_GC_UnTrack(two_tuple);
4714
4715 return 0;
4716
4717 error:
4718 Py_DECREF(copyreg);
4719 return -1;
4720}
4721
4722static struct PyModuleDef _picklemodule = {
4723 PyModuleDef_HEAD_INIT,
4724 "_pickle",
4725 pickle_module_doc,
4726 -1,
4727 NULL,
4728 NULL,
4729 NULL,
4730 NULL,
4731 NULL
4732};
4733
4734PyMODINIT_FUNC
4735PyInit__pickle(void)
4736{
4737 PyObject *m;
4738
4739 if (PyType_Ready(&Unpickler_Type) < 0)
4740 return NULL;
4741 if (PyType_Ready(&Pickler_Type) < 0)
4742 return NULL;
4743 if (PyType_Ready(&Pdata_Type) < 0)
4744 return NULL;
4745
4746 /* Create the module and add the functions. */
4747 m = PyModule_Create(&_picklemodule);
4748 if (m == NULL)
4749 return NULL;
4750
4751 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
4752 return NULL;
4753 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
4754 return NULL;
4755
4756 /* Initialize the exceptions. */
4757 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
4758 if (PickleError == NULL)
4759 return NULL;
4760 PicklingError = \
4761 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
4762 if (PicklingError == NULL)
4763 return NULL;
4764 UnpicklingError = \
4765 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
4766 if (UnpicklingError == NULL)
4767 return NULL;
4768
4769 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
4770 return NULL;
4771 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
4772 return NULL;
4773 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
4774 return NULL;
4775
4776 if (init_stuff() < 0)
4777 return NULL;
4778
4779 return m;
4780}