blob: 29aed7adb3b8d8ea4fbefeb1900d094038d6d180 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
13
14/* Pickle opcodes. These must be kept updated with pickle.py.
15 Extensive docs are in pickletools.py. */
16enum opcode {
17 MARK = '(',
18 STOP = '.',
19 POP = '0',
20 POP_MARK = '1',
21 DUP = '2',
22 FLOAT = 'F',
23 INT = 'I',
24 BININT = 'J',
25 BININT1 = 'K',
26 LONG = 'L',
27 BININT2 = 'M',
28 NONE = 'N',
29 PERSID = 'P',
30 BINPERSID = 'Q',
31 REDUCE = 'R',
32 STRING = 'S',
33 BINSTRING = 'T',
34 SHORT_BINSTRING = 'U',
35 UNICODE = 'V',
36 BINUNICODE = 'X',
37 APPEND = 'a',
38 BUILD = 'b',
39 GLOBAL = 'c',
40 DICT = 'd',
41 EMPTY_DICT = '}',
42 APPENDS = 'e',
43 GET = 'g',
44 BINGET = 'h',
45 INST = 'i',
46 LONG_BINGET = 'j',
47 LIST = 'l',
48 EMPTY_LIST = ']',
49 OBJ = 'o',
50 PUT = 'p',
51 BINPUT = 'q',
52 LONG_BINPUT = 'r',
53 SETITEM = 's',
54 TUPLE = 't',
55 EMPTY_TUPLE = ')',
56 SETITEMS = 'u',
57 BINFLOAT = 'G',
58
59 /* Protocol 2. */
60 PROTO = '\x80',
61 NEWOBJ = '\x81',
62 EXT1 = '\x82',
63 EXT2 = '\x83',
64 EXT4 = '\x84',
65 TUPLE1 = '\x85',
66 TUPLE2 = '\x86',
67 TUPLE3 = '\x87',
68 NEWTRUE = '\x88',
69 NEWFALSE = '\x89',
70 LONG1 = '\x8a',
71 LONG4 = '\x8b',
72
73 /* Protocol 3 (Python 3.x) */
74 BINBYTES = 'B',
75 SHORT_BINBYTES = 'C',
76};
77
78/* These aren't opcodes -- they're ways to pickle bools before protocol 2
79 * so that unpicklers written before bools were introduced unpickle them
80 * as ints, but unpicklers after can recognize that bools were intended.
81 * Note that protocol 2 added direct ways to pickle bools.
82 */
83#undef TRUE
84#define TRUE "I01\n"
85#undef FALSE
86#define FALSE "I00\n"
87
88enum {
89 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
90 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
91 break if this gets out of synch with pickle.py, but it's unclear that would
92 help anything either. */
93 BATCHSIZE = 1000,
94
95 /* Nesting limit until Pickler, when running in "fast mode", starts
96 checking for self-referential data-structures. */
97 FAST_NESTING_LIMIT = 50,
98
99 /* Size of the write buffer of Pickler. Higher values will reduce the
100 number of calls to the write() method of the output stream. */
101 WRITE_BUF_SIZE = 256,
102};
103
104/* Exception classes for pickle. These should override the ones defined in
105 pickle.py, when the C-optimized Pickler and Unpickler are used. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000106static PyObject *PickleError = NULL;
107static PyObject *PicklingError = NULL;
108static PyObject *UnpicklingError = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000109
110/* copyreg.dispatch_table, {type_object: pickling_function} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000111static PyObject *dispatch_table = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000112/* For EXT[124] opcodes. */
113/* copyreg._extension_registry, {(module_name, function_name): code} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000114static PyObject *extension_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000115/* copyreg._inverted_registry, {code: (module_name, function_name)} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000116static PyObject *inverted_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000117/* copyreg._extension_cache, {code: object} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000118static PyObject *extension_cache = NULL;
119
120/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
121static PyObject *name_mapping_2to3 = NULL;
122/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
123static PyObject *import_mapping_2to3 = NULL;
124/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
125static PyObject *name_mapping_3to2 = NULL;
126static PyObject *import_mapping_3to2 = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000127
128/* XXX: Are these really nescessary? */
129/* As the name says, an empty tuple. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000130static PyObject *empty_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000131/* For looking up name pairs in copyreg._extension_registry. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000132static PyObject *two_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000133
134static int
135stack_underflow(void)
136{
137 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
138 return -1;
139}
140
141/* Internal data type used as the unpickling stack. */
142typedef struct {
143 PyObject_HEAD
144 int length; /* number of initial slots in data currently used */
145 int size; /* number of slots in data allocated */
146 PyObject **data;
147} Pdata;
148
149static void
150Pdata_dealloc(Pdata *self)
151{
152 int i;
153 PyObject **p;
154
155 for (i = self->length, p = self->data; --i >= 0; p++) {
156 Py_DECREF(*p);
157 }
158 if (self->data)
159 PyMem_Free(self->data);
160 PyObject_Del(self);
161}
162
163static PyTypeObject Pdata_Type = {
164 PyVarObject_HEAD_INIT(NULL, 0)
165 "_pickle.Pdata", /*tp_name*/
166 sizeof(Pdata), /*tp_basicsize*/
167 0, /*tp_itemsize*/
168 (destructor)Pdata_dealloc, /*tp_dealloc*/
169};
170
171static PyObject *
172Pdata_New(void)
173{
174 Pdata *self;
175
176 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
177 return NULL;
178 self->size = 8;
179 self->length = 0;
180 self->data = PyMem_Malloc(self->size * sizeof(PyObject *));
181 if (self->data)
182 return (PyObject *)self;
183 Py_DECREF(self);
184 return PyErr_NoMemory();
185}
186
187
188/* Retain only the initial clearto items. If clearto >= the current
189 * number of items, this is a (non-erroneous) NOP.
190 */
191static int
192Pdata_clear(Pdata *self, int clearto)
193{
194 int i;
195 PyObject **p;
196
197 if (clearto < 0)
198 return stack_underflow();
199 if (clearto >= self->length)
200 return 0;
201
202 for (i = self->length, p = self->data + clearto; --i >= clearto; p++) {
203 Py_CLEAR(*p);
204 }
205 self->length = clearto;
206
207 return 0;
208}
209
210static int
211Pdata_grow(Pdata *self)
212{
213 int bigger;
214 size_t nbytes;
215 PyObject **tmp;
216
217 bigger = (self->size << 1) + 1;
218 if (bigger <= 0) /* was 0, or new value overflows */
219 goto nomemory;
220 if ((int)(size_t)bigger != bigger)
221 goto nomemory;
222 nbytes = (size_t)bigger * sizeof(PyObject *);
223 if (nbytes / sizeof(PyObject *) != (size_t)bigger)
224 goto nomemory;
225 tmp = PyMem_Realloc(self->data, nbytes);
226 if (tmp == NULL)
227 goto nomemory;
228 self->data = tmp;
229 self->size = bigger;
230 return 0;
231
232 nomemory:
233 PyErr_NoMemory();
234 return -1;
235}
236
237/* D is a Pdata*. Pop the topmost element and store it into V, which
238 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
239 * is raised and V is set to NULL.
240 */
241static PyObject *
242Pdata_pop(Pdata *self)
243{
244 if (self->length == 0) {
245 PyErr_SetString(UnpicklingError, "bad pickle data");
246 return NULL;
247 }
248 return self->data[--(self->length)];
249}
250#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
251
252static int
253Pdata_push(Pdata *self, PyObject *obj)
254{
255 if (self->length == self->size && Pdata_grow(self) < 0) {
256 return -1;
257 }
258 self->data[self->length++] = obj;
259 return 0;
260}
261
262/* Push an object on stack, transferring its ownership to the stack. */
263#define PDATA_PUSH(D, O, ER) do { \
264 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
265
266/* Push an object on stack, adding a new reference to the object. */
267#define PDATA_APPEND(D, O, ER) do { \
268 Py_INCREF((O)); \
269 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
270
271static PyObject *
272Pdata_poptuple(Pdata *self, Py_ssize_t start)
273{
274 PyObject *tuple;
275 Py_ssize_t len, i, j;
276
277 len = self->length - start;
278 tuple = PyTuple_New(len);
279 if (tuple == NULL)
280 return NULL;
281 for (i = start, j = 0; j < len; i++, j++)
282 PyTuple_SET_ITEM(tuple, j, self->data[i]);
283
284 self->length = start;
285 return tuple;
286}
287
288static PyObject *
289Pdata_poplist(Pdata *self, Py_ssize_t start)
290{
291 PyObject *list;
292 Py_ssize_t len, i, j;
293
294 len = self->length - start;
295 list = PyList_New(len);
296 if (list == NULL)
297 return NULL;
298 for (i = start, j = 0; j < len; i++, j++)
299 PyList_SET_ITEM(list, j, self->data[i]);
300
301 self->length = start;
302 return list;
303}
304
305typedef struct PicklerObject {
306 PyObject_HEAD
307 PyObject *write; /* write() method of the output stream */
308 PyObject *memo; /* Memo dictionary, keep track of the seen
309 objects to support self-referential objects
310 pickling. */
311 PyObject *pers_func; /* persistent_id() method, can be NULL */
312 PyObject *arg;
313 int proto; /* Pickle protocol number, >= 0 */
314 int bin; /* Boolean, true if proto > 0 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000315 int buf_size; /* Size of the current buffered pickle data */
316 char *write_buf; /* Write buffer, this is to avoid calling the
317 write() method of the output stream too
318 often. */
319 int fast; /* Enable fast mode if set to a true value.
320 The fast mode disable the usage of memo,
321 therefore speeding the pickling process by
322 not generating superfluous PUT opcodes. It
323 should not be used if with self-referential
324 objects. */
325 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000326 int fix_imports; /* Indicate whether Pickler should fix
327 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000328 PyObject *fast_memo;
329} PicklerObject;
330
331typedef struct UnpicklerObject {
332 PyObject_HEAD
333 Pdata *stack; /* Pickle data stack, store unpickled objects. */
334 PyObject *readline; /* readline() method of the output stream */
335 PyObject *read; /* read() method of the output stream */
336 PyObject *memo; /* Memo dictionary, provide the objects stored
337 using the PUT opcodes. */
338 PyObject *arg;
339 PyObject *pers_func; /* persistent_load() method, can be NULL. */
340 PyObject *last_string; /* Reference to the last string read by the
341 readline() method. */
342 char *buffer; /* Reading buffer. */
343 char *encoding; /* Name of the encoding to be used for
344 decoding strings pickled using Python
345 2.x. The default value is "ASCII" */
346 char *errors; /* Name of errors handling scheme to used when
347 decoding strings. The default value is
348 "strict". */
349 int *marks; /* Mark stack, used for unpickling container
350 objects. */
351 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
352 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000353 int proto; /* Protocol of the pickle loaded. */
354 int fix_imports; /* Indicate whether Unpickler should fix
355 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000356} UnpicklerObject;
357
358/* Forward declarations */
359static int save(PicklerObject *, PyObject *, int);
360static int save_reduce(PicklerObject *, PyObject *, PyObject *);
361static PyTypeObject Pickler_Type;
362static PyTypeObject Unpickler_Type;
363
364
365/* Helpers for creating the argument tuple passed to functions. This has the
366 performance advantage of calling PyTuple_New() only once. */
367
368#define ARG_TUP(self, obj) do { \
369 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
370 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
371 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
372 } \
373 else { \
374 Py_DECREF((obj)); \
375 } \
376 } while (0)
377
378#define FREE_ARG_TUP(self) do { \
379 if ((self)->arg->ob_refcnt > 1) \
380 Py_CLEAR((self)->arg); \
381 } while (0)
382
383/* A temporary cleaner API for fast single argument function call.
384
385 XXX: Does caching the argument tuple provides any real performance benefits?
386
387 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
388 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
389 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
390 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
391 (i.e, call PyTuple_New() and store the returned value in an array), to save
392 one second (wall clock time). Either ways, the loading time a pickle stream
393 large enough to generate this number of calls would be massively
394 overwhelmed by other factors, like I/O throughput, the GC traversal and
395 object allocation overhead. So, I really doubt these functions provide any
396 real benefits.
397
398 On the other hand, oprofile reports that pickle spends a lot of time in
399 these functions. But, that is probably more related to the function call
400 overhead, than the argument tuple allocation.
401
402 XXX: And, what is the reference behavior of these? Steal, borrow? At first
403 glance, it seems to steal the reference of 'arg' and borrow the reference
404 of 'func'.
405 */
406static PyObject *
407pickler_call(PicklerObject *self, PyObject *func, PyObject *arg)
408{
409 PyObject *result = NULL;
410
411 ARG_TUP(self, arg);
412 if (self->arg) {
413 result = PyObject_Call(func, self->arg, NULL);
414 FREE_ARG_TUP(self);
415 }
416 return result;
417}
418
419static PyObject *
420unpickler_call(UnpicklerObject *self, PyObject *func, PyObject *arg)
421{
422 PyObject *result = NULL;
423
424 ARG_TUP(self, arg);
425 if (self->arg) {
426 result = PyObject_Call(func, self->arg, NULL);
427 FREE_ARG_TUP(self);
428 }
429 return result;
430}
431
432static Py_ssize_t
433pickler_write(PicklerObject *self, const char *s, Py_ssize_t n)
434{
435 PyObject *data, *result;
436
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000437 if (self->write_buf == NULL) {
438 PyErr_SetString(PyExc_SystemError, "invalid write buffer");
439 return -1;
440 }
441
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000442 if (s == NULL) {
443 if (!(self->buf_size))
444 return 0;
445 data = PyBytes_FromStringAndSize(self->write_buf, self->buf_size);
446 if (data == NULL)
447 return -1;
448 }
449 else {
450 if (self->buf_size && (n + self->buf_size) > WRITE_BUF_SIZE) {
451 if (pickler_write(self, NULL, 0) < 0)
452 return -1;
453 }
454
455 if (n > WRITE_BUF_SIZE) {
456 if (!(data = PyBytes_FromStringAndSize(s, n)))
457 return -1;
458 }
459 else {
460 memcpy(self->write_buf + self->buf_size, s, n);
461 self->buf_size += n;
462 return n;
463 }
464 }
465
466 /* object with write method */
467 result = pickler_call(self, self->write, data);
468 if (result == NULL)
469 return -1;
470
471 Py_DECREF(result);
472 self->buf_size = 0;
473 return n;
474}
475
476/* XXX: These read/readline functions ought to be optimized. Buffered I/O
477 might help a lot, especially with the new (but much slower) io library.
478 On the other hand, the added complexity might not worth it.
479 */
480
481/* Read at least n characters from the input stream and set s to the current
482 reading position. */
483static Py_ssize_t
484unpickler_read(UnpicklerObject *self, char **s, Py_ssize_t n)
485{
486 PyObject *len;
487 PyObject *data;
488
489 len = PyLong_FromSsize_t(n);
490 if (len == NULL)
491 return -1;
492
493 data = unpickler_call(self, self->read, len);
494 if (data == NULL)
495 return -1;
496
497 /* XXX: Should bytearray be supported too? */
498 if (!PyBytes_Check(data)) {
499 PyErr_SetString(PyExc_ValueError,
500 "read() from the underlying stream did not"
501 "return bytes");
Amaury Forgeot d'Arc5f952572008-11-25 21:11:54 +0000502 Py_DECREF(data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000503 return -1;
504 }
505
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000506 if (PyBytes_GET_SIZE(data) != n) {
507 PyErr_SetNone(PyExc_EOFError);
Amaury Forgeot d'Arc5f952572008-11-25 21:11:54 +0000508 Py_DECREF(data);
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000509 return -1;
510 }
511
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000512 Py_XDECREF(self->last_string);
513 self->last_string = data;
514
515 if (!(*s = PyBytes_AS_STRING(data)))
516 return -1;
517
518 return n;
519}
520
521static Py_ssize_t
522unpickler_readline(UnpicklerObject *self, char **s)
523{
524 PyObject *data;
525
526 data = PyObject_CallObject(self->readline, empty_tuple);
527 if (data == NULL)
528 return -1;
529
530 /* XXX: Should bytearray be supported too? */
531 if (!PyBytes_Check(data)) {
532 PyErr_SetString(PyExc_ValueError,
533 "readline() from the underlying stream did not"
534 "return bytes");
535 return -1;
536 }
537
538 Py_XDECREF(self->last_string);
539 self->last_string = data;
540
541 if (!(*s = PyBytes_AS_STRING(data)))
542 return -1;
543
544 return PyBytes_GET_SIZE(data);
545}
546
547/* Generate a GET opcode for an object stored in the memo. The 'key' argument
548 should be the address of the object as returned by PyLong_FromVoidPtr(). */
549static int
550memo_get(PicklerObject *self, PyObject *key)
551{
552 PyObject *value;
553 PyObject *memo_id;
554 long x;
555 char pdata[30];
556 int len;
557
558 value = PyDict_GetItemWithError(self->memo, key);
559 if (value == NULL) {
560 if (!PyErr_Occurred())
561 PyErr_SetObject(PyExc_KeyError, key);
562 return -1;
563 }
564
565 memo_id = PyTuple_GetItem(value, 0);
566 if (memo_id == NULL)
567 return -1;
568
569 if (!PyLong_Check(memo_id)) {
570 PyErr_SetString(PicklingError, "memo id must be an integer");
571 return -1;
572 }
573 x = PyLong_AsLong(memo_id);
574 if (x == -1 && PyErr_Occurred())
575 return -1;
576
577 if (!self->bin) {
578 pdata[0] = GET;
579 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
580 len = (int)strlen(pdata);
581 }
582 else {
583 if (x < 256) {
584 pdata[0] = BINGET;
585 pdata[1] = (unsigned char)(x & 0xff);
586 len = 2;
587 }
588 else if (x <= 0xffffffffL) {
589 pdata[0] = LONG_BINGET;
590 pdata[1] = (unsigned char)(x & 0xff);
591 pdata[2] = (unsigned char)((x >> 8) & 0xff);
592 pdata[3] = (unsigned char)((x >> 16) & 0xff);
593 pdata[4] = (unsigned char)((x >> 24) & 0xff);
594 len = 5;
595 }
596 else { /* unlikely */
597 PyErr_SetString(PicklingError,
598 "memo id too large for LONG_BINGET");
599 return -1;
600 }
601 }
602
603 if (pickler_write(self, pdata, len) < 0)
604 return -1;
605
606 return 0;
607}
608
609/* Store an object in the memo, assign it a new unique ID based on the number
610 of objects currently stored in the memo and generate a PUT opcode. */
611static int
612memo_put(PicklerObject *self, PyObject *obj)
613{
614 PyObject *key = NULL;
615 PyObject *memo_id = NULL;
616 PyObject *tuple = NULL;
617 long x;
618 char pdata[30];
619 int len;
620 int status = 0;
621
622 if (self->fast)
623 return 0;
624
625 key = PyLong_FromVoidPtr(obj);
626 if (key == NULL)
627 goto error;
628 if ((x = PyDict_Size(self->memo)) < 0)
629 goto error;
630 memo_id = PyLong_FromLong(x);
631 if (memo_id == NULL)
632 goto error;
633 tuple = PyTuple_New(2);
634 if (tuple == NULL)
635 goto error;
636
637 Py_INCREF(memo_id);
638 PyTuple_SET_ITEM(tuple, 0, memo_id);
639 Py_INCREF(obj);
640 PyTuple_SET_ITEM(tuple, 1, obj);
641 if (PyDict_SetItem(self->memo, key, tuple) < 0)
642 goto error;
643
644 if (!self->bin) {
645 pdata[0] = PUT;
646 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
647 len = strlen(pdata);
648 }
649 else {
650 if (x < 256) {
651 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +0000652 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000653 len = 2;
654 }
655 else if (x <= 0xffffffffL) {
656 pdata[0] = LONG_BINPUT;
657 pdata[1] = (unsigned char)(x & 0xff);
658 pdata[2] = (unsigned char)((x >> 8) & 0xff);
659 pdata[3] = (unsigned char)((x >> 16) & 0xff);
660 pdata[4] = (unsigned char)((x >> 24) & 0xff);
661 len = 5;
662 }
663 else { /* unlikely */
664 PyErr_SetString(PicklingError,
665 "memo id too large for LONG_BINPUT");
666 return -1;
667 }
668 }
669
670 if (pickler_write(self, pdata, len) < 0)
671 goto error;
672
673 if (0) {
674 error:
675 status = -1;
676 }
677
678 Py_XDECREF(key);
679 Py_XDECREF(memo_id);
680 Py_XDECREF(tuple);
681
682 return status;
683}
684
685static PyObject *
686whichmodule(PyObject *global, PyObject *global_name)
687{
688 Py_ssize_t i, j;
689 static PyObject *module_str = NULL;
690 static PyObject *main_str = NULL;
691 PyObject *module_name;
692 PyObject *modules_dict;
693 PyObject *module;
694 PyObject *obj;
695
696 if (module_str == NULL) {
697 module_str = PyUnicode_InternFromString("__module__");
698 if (module_str == NULL)
699 return NULL;
700 main_str = PyUnicode_InternFromString("__main__");
701 if (main_str == NULL)
702 return NULL;
703 }
704
705 module_name = PyObject_GetAttr(global, module_str);
706
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +0000707 /* In some rare cases (e.g., bound methods of extension types),
708 __module__ can be None. If it is so, then search sys.modules
709 for the module of global. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000710 if (module_name == Py_None) {
711 Py_DECREF(module_name);
712 goto search;
713 }
714
715 if (module_name) {
716 return module_name;
717 }
718 if (PyErr_ExceptionMatches(PyExc_AttributeError))
719 PyErr_Clear();
720 else
721 return NULL;
722
723 search:
724 modules_dict = PySys_GetObject("modules");
725 if (modules_dict == NULL)
726 return NULL;
727
728 i = 0;
729 module_name = NULL;
730 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Mark Dickinson211c6252009-02-01 10:28:51 +0000731 if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000732 continue;
733
734 obj = PyObject_GetAttr(module, global_name);
735 if (obj == NULL) {
736 if (PyErr_ExceptionMatches(PyExc_AttributeError))
737 PyErr_Clear();
738 else
739 return NULL;
740 continue;
741 }
742
743 if (obj != global) {
744 Py_DECREF(obj);
745 continue;
746 }
747
748 Py_DECREF(obj);
749 break;
750 }
751
752 /* If no module is found, use __main__. */
753 if (!j) {
754 module_name = main_str;
755 }
756
757 Py_INCREF(module_name);
758 return module_name;
759}
760
761/* fast_save_enter() and fast_save_leave() are guards against recursive
762 objects when Pickler is used with the "fast mode" (i.e., with object
763 memoization disabled). If the nesting of a list or dict object exceed
764 FAST_NESTING_LIMIT, these guards will start keeping an internal
765 reference to the seen list or dict objects and check whether these objects
766 are recursive. These are not strictly necessary, since save() has a
767 hard-coded recursion limit, but they give a nicer error message than the
768 typical RuntimeError. */
769static int
770fast_save_enter(PicklerObject *self, PyObject *obj)
771{
772 /* if fast_nesting < 0, we're doing an error exit. */
773 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
774 PyObject *key = NULL;
775 if (self->fast_memo == NULL) {
776 self->fast_memo = PyDict_New();
777 if (self->fast_memo == NULL) {
778 self->fast_nesting = -1;
779 return 0;
780 }
781 }
782 key = PyLong_FromVoidPtr(obj);
783 if (key == NULL)
784 return 0;
785 if (PyDict_GetItem(self->fast_memo, key)) {
786 Py_DECREF(key);
787 PyErr_Format(PyExc_ValueError,
788 "fast mode: can't pickle cyclic objects "
789 "including object type %.200s at %p",
790 obj->ob_type->tp_name, obj);
791 self->fast_nesting = -1;
792 return 0;
793 }
794 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
795 Py_DECREF(key);
796 self->fast_nesting = -1;
797 return 0;
798 }
799 Py_DECREF(key);
800 }
801 return 1;
802}
803
804static int
805fast_save_leave(PicklerObject *self, PyObject *obj)
806{
807 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
808 PyObject *key = PyLong_FromVoidPtr(obj);
809 if (key == NULL)
810 return 0;
811 if (PyDict_DelItem(self->fast_memo, key) < 0) {
812 Py_DECREF(key);
813 return 0;
814 }
815 Py_DECREF(key);
816 }
817 return 1;
818}
819
820static int
821save_none(PicklerObject *self, PyObject *obj)
822{
823 const char none_op = NONE;
824 if (pickler_write(self, &none_op, 1) < 0)
825 return -1;
826
827 return 0;
828}
829
830static int
831save_bool(PicklerObject *self, PyObject *obj)
832{
833 static const char *buf[2] = { FALSE, TRUE };
834 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
835 int p = (obj == Py_True);
836
837 if (self->proto >= 2) {
838 const char bool_op = p ? NEWTRUE : NEWFALSE;
839 if (pickler_write(self, &bool_op, 1) < 0)
840 return -1;
841 }
842 else if (pickler_write(self, buf[p], len[p]) < 0)
843 return -1;
844
845 return 0;
846}
847
848static int
849save_int(PicklerObject *self, long x)
850{
851 char pdata[32];
852 int len = 0;
853
854 if (!self->bin
855#if SIZEOF_LONG > 4
856 || x > 0x7fffffffL || x < -0x80000000L
857#endif
858 ) {
859 /* Text-mode pickle, or long too big to fit in the 4-byte
860 * signed BININT format: store as a string.
861 */
Mark Dickinson8dd05142009-01-20 20:43:58 +0000862 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
863 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000864 if (pickler_write(self, pdata, strlen(pdata)) < 0)
865 return -1;
866 }
867 else {
868 /* Binary pickle and x fits in a signed 4-byte int. */
869 pdata[1] = (unsigned char)(x & 0xff);
870 pdata[2] = (unsigned char)((x >> 8) & 0xff);
871 pdata[3] = (unsigned char)((x >> 16) & 0xff);
872 pdata[4] = (unsigned char)((x >> 24) & 0xff);
873
874 if ((pdata[4] == 0) && (pdata[3] == 0)) {
875 if (pdata[2] == 0) {
876 pdata[0] = BININT1;
877 len = 2;
878 }
879 else {
880 pdata[0] = BININT2;
881 len = 3;
882 }
883 }
884 else {
885 pdata[0] = BININT;
886 len = 5;
887 }
888
889 if (pickler_write(self, pdata, len) < 0)
890 return -1;
891 }
892
893 return 0;
894}
895
896static int
897save_long(PicklerObject *self, PyObject *obj)
898{
899 PyObject *repr = NULL;
900 Py_ssize_t size;
901 long val = PyLong_AsLong(obj);
902 int status = 0;
903
904 const char long_op = LONG;
905
906 if (val == -1 && PyErr_Occurred()) {
907 /* out of range for int pickling */
908 PyErr_Clear();
909 }
910 else
911 return save_int(self, val);
912
913 if (self->proto >= 2) {
914 /* Linear-time pickling. */
915 size_t nbits;
916 size_t nbytes;
917 unsigned char *pdata;
918 char header[5];
919 int i;
920 int sign = _PyLong_Sign(obj);
921
922 if (sign == 0) {
923 header[0] = LONG1;
924 header[1] = 0; /* It's 0 -- an empty bytestring. */
925 if (pickler_write(self, header, 2) < 0)
926 goto error;
927 return 0;
928 }
929 nbits = _PyLong_NumBits(obj);
930 if (nbits == (size_t)-1 && PyErr_Occurred())
931 goto error;
932 /* How many bytes do we need? There are nbits >> 3 full
933 * bytes of data, and nbits & 7 leftover bits. If there
934 * are any leftover bits, then we clearly need another
935 * byte. Wnat's not so obvious is that we *probably*
936 * need another byte even if there aren't any leftovers:
937 * the most-significant bit of the most-significant byte
938 * acts like a sign bit, and it's usually got a sense
939 * opposite of the one we need. The exception is longs
940 * of the form -(2**(8*j-1)) for j > 0. Such a long is
941 * its own 256's-complement, so has the right sign bit
942 * even without the extra byte. That's a pain to check
943 * for in advance, though, so we always grab an extra
944 * byte at the start, and cut it back later if possible.
945 */
946 nbytes = (nbits >> 3) + 1;
947 if (nbytes > INT_MAX) {
948 PyErr_SetString(PyExc_OverflowError,
949 "long too large to pickle");
950 goto error;
951 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000952 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000953 if (repr == NULL)
954 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000955 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000956 i = _PyLong_AsByteArray((PyLongObject *)obj,
957 pdata, nbytes,
958 1 /* little endian */ , 1 /* signed */ );
959 if (i < 0)
960 goto error;
961 /* If the long is negative, this may be a byte more than
962 * needed. This is so iff the MSB is all redundant sign
963 * bits.
964 */
965 if (sign < 0 &&
966 nbytes > 1 &&
967 pdata[nbytes - 1] == 0xff &&
968 (pdata[nbytes - 2] & 0x80) != 0) {
969 nbytes--;
970 }
971
972 if (nbytes < 256) {
973 header[0] = LONG1;
974 header[1] = (unsigned char)nbytes;
975 size = 2;
976 }
977 else {
978 header[0] = LONG4;
979 size = (int)nbytes;
980 for (i = 1; i < 5; i++) {
981 header[i] = (unsigned char)(size & 0xff);
982 size >>= 8;
983 }
984 size = 5;
985 }
986 if (pickler_write(self, header, size) < 0 ||
987 pickler_write(self, (char *)pdata, (int)nbytes) < 0)
988 goto error;
989 }
990 else {
991 char *string;
992
Mark Dickinson8dd05142009-01-20 20:43:58 +0000993 /* proto < 2: write the repr and newline. This is quadratic-time (in
994 the number of digits), in both directions. We add a trailing 'L'
995 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000996
997 repr = PyObject_Repr(obj);
998 if (repr == NULL)
999 goto error;
1000
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001001 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001002 if (string == NULL)
1003 goto error;
1004
1005 if (pickler_write(self, &long_op, 1) < 0 ||
1006 pickler_write(self, string, size) < 0 ||
Mark Dickinson8dd05142009-01-20 20:43:58 +00001007 pickler_write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001008 goto error;
1009 }
1010
1011 if (0) {
1012 error:
1013 status = -1;
1014 }
1015 Py_XDECREF(repr);
1016
1017 return status;
1018}
1019
1020static int
1021save_float(PicklerObject *self, PyObject *obj)
1022{
1023 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1024
1025 if (self->bin) {
1026 char pdata[9];
1027 pdata[0] = BINFLOAT;
1028 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1029 return -1;
1030 if (pickler_write(self, pdata, 9) < 0)
1031 return -1;
Eric Smith0923d1d2009-04-16 20:16:10 +00001032 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001033 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001034 int result = -1;
1035 char *buf = NULL;
1036 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001037
Eric Smith0923d1d2009-04-16 20:16:10 +00001038 if (pickler_write(self, &op, 1) < 0)
1039 goto done;
1040
Mark Dickinson3e09f432009-04-17 08:41:23 +00001041 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001042 if (!buf) {
1043 PyErr_NoMemory();
1044 goto done;
1045 }
1046
1047 if (pickler_write(self, buf, strlen(buf)) < 0)
1048 goto done;
1049
1050 if (pickler_write(self, "\n", 1) < 0)
1051 goto done;
1052
1053 result = 0;
1054done:
1055 PyMem_Free(buf);
1056 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001057 }
1058
1059 return 0;
1060}
1061
1062static int
1063save_bytes(PicklerObject *self, PyObject *obj)
1064{
1065 if (self->proto < 3) {
1066 /* Older pickle protocols do not have an opcode for pickling bytes
1067 objects. Therefore, we need to fake the copy protocol (i.e.,
1068 the __reduce__ method) to permit bytes object unpickling. */
1069 PyObject *reduce_value = NULL;
1070 PyObject *bytelist = NULL;
1071 int status;
1072
1073 bytelist = PySequence_List(obj);
1074 if (bytelist == NULL)
1075 return -1;
1076
1077 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1078 bytelist);
1079 if (reduce_value == NULL) {
1080 Py_DECREF(bytelist);
1081 return -1;
1082 }
1083
1084 /* save_reduce() will memoize the object automatically. */
1085 status = save_reduce(self, reduce_value, obj);
1086 Py_DECREF(reduce_value);
1087 Py_DECREF(bytelist);
1088 return status;
1089 }
1090 else {
1091 Py_ssize_t size;
1092 char header[5];
1093 int len;
1094
1095 size = PyBytes_Size(obj);
1096 if (size < 0)
1097 return -1;
1098
1099 if (size < 256) {
1100 header[0] = SHORT_BINBYTES;
1101 header[1] = (unsigned char)size;
1102 len = 2;
1103 }
1104 else if (size <= 0xffffffffL) {
1105 header[0] = BINBYTES;
1106 header[1] = (unsigned char)(size & 0xff);
1107 header[2] = (unsigned char)((size >> 8) & 0xff);
1108 header[3] = (unsigned char)((size >> 16) & 0xff);
1109 header[4] = (unsigned char)((size >> 24) & 0xff);
1110 len = 5;
1111 }
1112 else {
1113 return -1; /* string too large */
1114 }
1115
1116 if (pickler_write(self, header, len) < 0)
1117 return -1;
1118
1119 if (pickler_write(self, PyBytes_AS_STRING(obj), size) < 0)
1120 return -1;
1121
1122 if (memo_put(self, obj) < 0)
1123 return -1;
1124
1125 return 0;
1126 }
1127}
1128
1129/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1130 backslash and newline characters to \uXXXX escapes. */
1131static PyObject *
1132raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1133{
1134 PyObject *repr, *result;
1135 char *p;
1136 char *q;
1137
1138 static const char *hexdigits = "0123456789abcdef";
1139
1140#ifdef Py_UNICODE_WIDE
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001141 const Py_ssize_t expandsize = 10;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001142#else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001143 const Py_ssize_t expandsize = 6;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001144#endif
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001145
1146 if (size > PY_SSIZE_T_MAX / expandsize)
1147 return PyErr_NoMemory();
1148
1149 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001150 if (repr == NULL)
1151 return NULL;
1152 if (size == 0)
1153 goto done;
1154
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001155 p = q = PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001156 while (size-- > 0) {
1157 Py_UNICODE ch = *s++;
1158#ifdef Py_UNICODE_WIDE
1159 /* Map 32-bit characters to '\Uxxxxxxxx' */
1160 if (ch >= 0x10000) {
1161 *p++ = '\\';
1162 *p++ = 'U';
1163 *p++ = hexdigits[(ch >> 28) & 0xf];
1164 *p++ = hexdigits[(ch >> 24) & 0xf];
1165 *p++ = hexdigits[(ch >> 20) & 0xf];
1166 *p++ = hexdigits[(ch >> 16) & 0xf];
1167 *p++ = hexdigits[(ch >> 12) & 0xf];
1168 *p++ = hexdigits[(ch >> 8) & 0xf];
1169 *p++ = hexdigits[(ch >> 4) & 0xf];
1170 *p++ = hexdigits[ch & 15];
1171 }
1172 else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001173#else
1174 /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
1175 if (ch >= 0xD800 && ch < 0xDC00) {
1176 Py_UNICODE ch2;
1177 Py_UCS4 ucs;
1178
1179 ch2 = *s++;
1180 size--;
1181 if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
1182 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
1183 *p++ = '\\';
1184 *p++ = 'U';
1185 *p++ = hexdigits[(ucs >> 28) & 0xf];
1186 *p++ = hexdigits[(ucs >> 24) & 0xf];
1187 *p++ = hexdigits[(ucs >> 20) & 0xf];
1188 *p++ = hexdigits[(ucs >> 16) & 0xf];
1189 *p++ = hexdigits[(ucs >> 12) & 0xf];
1190 *p++ = hexdigits[(ucs >> 8) & 0xf];
1191 *p++ = hexdigits[(ucs >> 4) & 0xf];
1192 *p++ = hexdigits[ucs & 0xf];
1193 continue;
1194 }
1195 /* Fall through: isolated surrogates are copied as-is */
1196 s--;
1197 size++;
1198 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001199#endif
1200 /* Map 16-bit characters to '\uxxxx' */
1201 if (ch >= 256 || ch == '\\' || ch == '\n') {
1202 *p++ = '\\';
1203 *p++ = 'u';
1204 *p++ = hexdigits[(ch >> 12) & 0xf];
1205 *p++ = hexdigits[(ch >> 8) & 0xf];
1206 *p++ = hexdigits[(ch >> 4) & 0xf];
1207 *p++ = hexdigits[ch & 15];
1208 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001209 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001210 else
1211 *p++ = (char) ch;
1212 }
1213 size = p - q;
1214
1215 done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001216 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001217 Py_DECREF(repr);
1218 return result;
1219}
1220
1221static int
1222save_unicode(PicklerObject *self, PyObject *obj)
1223{
1224 Py_ssize_t size;
1225 PyObject *encoded = NULL;
1226
1227 if (self->bin) {
1228 char pdata[5];
1229
1230 encoded = PyUnicode_AsUTF8String(obj);
1231 if (encoded == NULL)
1232 goto error;
1233
1234 size = PyBytes_GET_SIZE(encoded);
1235 if (size < 0 || size > 0xffffffffL)
1236 goto error; /* string too large */
1237
1238 pdata[0] = BINUNICODE;
1239 pdata[1] = (unsigned char)(size & 0xff);
1240 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1241 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1242 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1243
1244 if (pickler_write(self, pdata, 5) < 0)
1245 goto error;
1246
1247 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1248 goto error;
1249 }
1250 else {
1251 const char unicode_op = UNICODE;
1252
1253 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1254 PyUnicode_GET_SIZE(obj));
1255 if (encoded == NULL)
1256 goto error;
1257
1258 if (pickler_write(self, &unicode_op, 1) < 0)
1259 goto error;
1260
1261 size = PyBytes_GET_SIZE(encoded);
1262 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1263 goto error;
1264
1265 if (pickler_write(self, "\n", 1) < 0)
1266 goto error;
1267 }
1268 if (memo_put(self, obj) < 0)
1269 goto error;
1270
1271 Py_DECREF(encoded);
1272 return 0;
1273
1274 error:
1275 Py_XDECREF(encoded);
1276 return -1;
1277}
1278
1279/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1280static int
1281store_tuple_elements(PicklerObject *self, PyObject *t, int len)
1282{
1283 int i;
1284
1285 assert(PyTuple_Size(t) == len);
1286
1287 for (i = 0; i < len; i++) {
1288 PyObject *element = PyTuple_GET_ITEM(t, i);
1289
1290 if (element == NULL)
1291 return -1;
1292 if (save(self, element, 0) < 0)
1293 return -1;
1294 }
1295
1296 return 0;
1297}
1298
1299/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1300 * used across protocols to minimize the space needed to pickle them.
1301 * Tuples are also the only builtin immutable type that can be recursive
1302 * (a tuple can be reached from itself), and that requires some subtle
1303 * magic so that it works in all cases. IOW, this is a long routine.
1304 */
1305static int
1306save_tuple(PicklerObject *self, PyObject *obj)
1307{
1308 PyObject *memo_key = NULL;
1309 int len, i;
1310 int status = 0;
1311
1312 const char mark_op = MARK;
1313 const char tuple_op = TUPLE;
1314 const char pop_op = POP;
1315 const char pop_mark_op = POP_MARK;
1316 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1317
1318 if ((len = PyTuple_Size(obj)) < 0)
1319 return -1;
1320
1321 if (len == 0) {
1322 char pdata[2];
1323
1324 if (self->proto) {
1325 pdata[0] = EMPTY_TUPLE;
1326 len = 1;
1327 }
1328 else {
1329 pdata[0] = MARK;
1330 pdata[1] = TUPLE;
1331 len = 2;
1332 }
1333 if (pickler_write(self, pdata, len) < 0)
1334 return -1;
1335 return 0;
1336 }
1337
1338 /* id(tuple) isn't in the memo now. If it shows up there after
1339 * saving the tuple elements, the tuple must be recursive, in
1340 * which case we'll pop everything we put on the stack, and fetch
1341 * its value from the memo.
1342 */
1343 memo_key = PyLong_FromVoidPtr(obj);
1344 if (memo_key == NULL)
1345 return -1;
1346
1347 if (len <= 3 && self->proto >= 2) {
1348 /* Use TUPLE{1,2,3} opcodes. */
1349 if (store_tuple_elements(self, obj, len) < 0)
1350 goto error;
1351
1352 if (PyDict_GetItem(self->memo, memo_key)) {
1353 /* pop the len elements */
1354 for (i = 0; i < len; i++)
1355 if (pickler_write(self, &pop_op, 1) < 0)
1356 goto error;
1357 /* fetch from memo */
1358 if (memo_get(self, memo_key) < 0)
1359 goto error;
1360
1361 Py_DECREF(memo_key);
1362 return 0;
1363 }
1364 else { /* Not recursive. */
1365 if (pickler_write(self, len2opcode + len, 1) < 0)
1366 goto error;
1367 }
1368 goto memoize;
1369 }
1370
1371 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
1372 * Generate MARK e1 e2 ... TUPLE
1373 */
1374 if (pickler_write(self, &mark_op, 1) < 0)
1375 goto error;
1376
1377 if (store_tuple_elements(self, obj, len) < 0)
1378 goto error;
1379
1380 if (PyDict_GetItem(self->memo, memo_key)) {
1381 /* pop the stack stuff we pushed */
1382 if (self->bin) {
1383 if (pickler_write(self, &pop_mark_op, 1) < 0)
1384 goto error;
1385 }
1386 else {
1387 /* Note that we pop one more than len, to remove
1388 * the MARK too.
1389 */
1390 for (i = 0; i <= len; i++)
1391 if (pickler_write(self, &pop_op, 1) < 0)
1392 goto error;
1393 }
1394 /* fetch from memo */
1395 if (memo_get(self, memo_key) < 0)
1396 goto error;
1397
1398 Py_DECREF(memo_key);
1399 return 0;
1400 }
1401 else { /* Not recursive. */
1402 if (pickler_write(self, &tuple_op, 1) < 0)
1403 goto error;
1404 }
1405
1406 memoize:
1407 if (memo_put(self, obj) < 0)
1408 goto error;
1409
1410 if (0) {
1411 error:
1412 status = -1;
1413 }
1414
1415 Py_DECREF(memo_key);
1416 return status;
1417}
1418
1419/* iter is an iterator giving items, and we batch up chunks of
1420 * MARK item item ... item APPENDS
1421 * opcode sequences. Calling code should have arranged to first create an
1422 * empty list, or list-like object, for the APPENDS to operate on.
1423 * Returns 0 on success, <0 on error.
1424 */
1425static int
1426batch_list(PicklerObject *self, PyObject *iter)
1427{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001428 PyObject *obj = NULL;
1429 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001430 int i, n;
1431
1432 const char mark_op = MARK;
1433 const char append_op = APPEND;
1434 const char appends_op = APPENDS;
1435
1436 assert(iter != NULL);
1437
1438 /* XXX: I think this function could be made faster by avoiding the
1439 iterator interface and fetching objects directly from list using
1440 PyList_GET_ITEM.
1441 */
1442
1443 if (self->proto == 0) {
1444 /* APPENDS isn't available; do one at a time. */
1445 for (;;) {
1446 obj = PyIter_Next(iter);
1447 if (obj == NULL) {
1448 if (PyErr_Occurred())
1449 return -1;
1450 break;
1451 }
1452 i = save(self, obj, 0);
1453 Py_DECREF(obj);
1454 if (i < 0)
1455 return -1;
1456 if (pickler_write(self, &append_op, 1) < 0)
1457 return -1;
1458 }
1459 return 0;
1460 }
1461
1462 /* proto > 0: write in batches of BATCHSIZE. */
1463 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001464 /* Get first item */
1465 firstitem = PyIter_Next(iter);
1466 if (firstitem == NULL) {
1467 if (PyErr_Occurred())
1468 goto error;
1469
1470 /* nothing more to add */
1471 break;
1472 }
1473
1474 /* Try to get a second item */
1475 obj = PyIter_Next(iter);
1476 if (obj == NULL) {
1477 if (PyErr_Occurred())
1478 goto error;
1479
1480 /* Only one item to write */
1481 if (save(self, firstitem, 0) < 0)
1482 goto error;
1483 if (pickler_write(self, &append_op, 1) < 0)
1484 goto error;
1485 Py_CLEAR(firstitem);
1486 break;
1487 }
1488
1489 /* More than one item to write */
1490
1491 /* Pump out MARK, items, APPENDS. */
1492 if (pickler_write(self, &mark_op, 1) < 0)
1493 goto error;
1494
1495 if (save(self, firstitem, 0) < 0)
1496 goto error;
1497 Py_CLEAR(firstitem);
1498 n = 1;
1499
1500 /* Fetch and save up to BATCHSIZE items */
1501 while (obj) {
1502 if (save(self, obj, 0) < 0)
1503 goto error;
1504 Py_CLEAR(obj);
1505 n += 1;
1506
1507 if (n == BATCHSIZE)
1508 break;
1509
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001510 obj = PyIter_Next(iter);
1511 if (obj == NULL) {
1512 if (PyErr_Occurred())
1513 goto error;
1514 break;
1515 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001516 }
1517
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001518 if (pickler_write(self, &appends_op, 1) < 0)
1519 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001520
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001521 } while (n == BATCHSIZE);
1522 return 0;
1523
1524 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001525 Py_XDECREF(firstitem);
1526 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001527 return -1;
1528}
1529
1530static int
1531save_list(PicklerObject *self, PyObject *obj)
1532{
1533 PyObject *iter;
1534 char header[3];
1535 int len;
1536 int status = 0;
1537
1538 if (self->fast && !fast_save_enter(self, obj))
1539 goto error;
1540
1541 /* Create an empty list. */
1542 if (self->bin) {
1543 header[0] = EMPTY_LIST;
1544 len = 1;
1545 }
1546 else {
1547 header[0] = MARK;
1548 header[1] = LIST;
1549 len = 2;
1550 }
1551
1552 if (pickler_write(self, header, len) < 0)
1553 goto error;
1554
1555 /* Get list length, and bow out early if empty. */
1556 if ((len = PyList_Size(obj)) < 0)
1557 goto error;
1558
1559 if (memo_put(self, obj) < 0)
1560 goto error;
1561
1562 if (len != 0) {
1563 /* Save the list elements. */
1564 iter = PyObject_GetIter(obj);
1565 if (iter == NULL)
1566 goto error;
1567 status = batch_list(self, iter);
1568 Py_DECREF(iter);
1569 }
1570
1571 if (0) {
1572 error:
1573 status = -1;
1574 }
1575
1576 if (self->fast && !fast_save_leave(self, obj))
1577 status = -1;
1578
1579 return status;
1580}
1581
1582/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
1583 * MARK key value ... key value SETITEMS
1584 * opcode sequences. Calling code should have arranged to first create an
1585 * empty dict, or dict-like object, for the SETITEMS to operate on.
1586 * Returns 0 on success, <0 on error.
1587 *
1588 * This is very much like batch_list(). The difference between saving
1589 * elements directly, and picking apart two-tuples, is so long-winded at
1590 * the C level, though, that attempts to combine these routines were too
1591 * ugly to bear.
1592 */
1593static int
1594batch_dict(PicklerObject *self, PyObject *iter)
1595{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001596 PyObject *obj = NULL;
1597 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001598 int i, n;
1599
1600 const char mark_op = MARK;
1601 const char setitem_op = SETITEM;
1602 const char setitems_op = SETITEMS;
1603
1604 assert(iter != NULL);
1605
1606 if (self->proto == 0) {
1607 /* SETITEMS isn't available; do one at a time. */
1608 for (;;) {
1609 obj = PyIter_Next(iter);
1610 if (obj == NULL) {
1611 if (PyErr_Occurred())
1612 return -1;
1613 break;
1614 }
1615 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1616 PyErr_SetString(PyExc_TypeError, "dict items "
1617 "iterator must return 2-tuples");
1618 return -1;
1619 }
1620 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
1621 if (i >= 0)
1622 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
1623 Py_DECREF(obj);
1624 if (i < 0)
1625 return -1;
1626 if (pickler_write(self, &setitem_op, 1) < 0)
1627 return -1;
1628 }
1629 return 0;
1630 }
1631
1632 /* proto > 0: write in batches of BATCHSIZE. */
1633 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001634 /* Get first item */
1635 firstitem = PyIter_Next(iter);
1636 if (firstitem == NULL) {
1637 if (PyErr_Occurred())
1638 goto error;
1639
1640 /* nothing more to add */
1641 break;
1642 }
1643 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
1644 PyErr_SetString(PyExc_TypeError, "dict items "
1645 "iterator must return 2-tuples");
1646 goto error;
1647 }
1648
1649 /* Try to get a second item */
1650 obj = PyIter_Next(iter);
1651 if (obj == NULL) {
1652 if (PyErr_Occurred())
1653 goto error;
1654
1655 /* Only one item to write */
1656 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1657 goto error;
1658 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1659 goto error;
1660 if (pickler_write(self, &setitem_op, 1) < 0)
1661 goto error;
1662 Py_CLEAR(firstitem);
1663 break;
1664 }
1665
1666 /* More than one item to write */
1667
1668 /* Pump out MARK, items, SETITEMS. */
1669 if (pickler_write(self, &mark_op, 1) < 0)
1670 goto error;
1671
1672 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1673 goto error;
1674 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1675 goto error;
1676 Py_CLEAR(firstitem);
1677 n = 1;
1678
1679 /* Fetch and save up to BATCHSIZE items */
1680 while (obj) {
1681 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1682 PyErr_SetString(PyExc_TypeError, "dict items "
1683 "iterator must return 2-tuples");
1684 goto error;
1685 }
1686 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
1687 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
1688 goto error;
1689 Py_CLEAR(obj);
1690 n += 1;
1691
1692 if (n == BATCHSIZE)
1693 break;
1694
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001695 obj = PyIter_Next(iter);
1696 if (obj == NULL) {
1697 if (PyErr_Occurred())
1698 goto error;
1699 break;
1700 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001701 }
1702
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001703 if (pickler_write(self, &setitems_op, 1) < 0)
1704 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001705
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001706 } while (n == BATCHSIZE);
1707 return 0;
1708
1709 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001710 Py_XDECREF(firstitem);
1711 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001712 return -1;
1713}
1714
Collin Winter5c9b02d2009-05-25 05:43:30 +00001715/* This is a variant of batch_dict() above that specializes for dicts, with no
1716 * support for dict subclasses. Like batch_dict(), we batch up chunks of
1717 * MARK key value ... key value SETITEMS
1718 * opcode sequences. Calling code should have arranged to first create an
1719 * empty dict, or dict-like object, for the SETITEMS to operate on.
1720 * Returns 0 on success, -1 on error.
1721 *
1722 * Note that this currently doesn't work for protocol 0.
1723 */
1724static int
1725batch_dict_exact(PicklerObject *self, PyObject *obj)
1726{
1727 PyObject *key = NULL, *value = NULL;
1728 int i;
1729 Py_ssize_t dict_size, ppos = 0;
1730
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00001731 const char mark_op = MARK;
1732 const char setitem_op = SETITEM;
1733 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00001734
1735 assert(obj != NULL);
1736 assert(self->proto > 0);
1737
1738 dict_size = PyDict_Size(obj);
1739
1740 /* Special-case len(d) == 1 to save space. */
1741 if (dict_size == 1) {
1742 PyDict_Next(obj, &ppos, &key, &value);
1743 if (save(self, key, 0) < 0)
1744 return -1;
1745 if (save(self, value, 0) < 0)
1746 return -1;
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00001747 if (pickler_write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00001748 return -1;
1749 return 0;
1750 }
1751
1752 /* Write in batches of BATCHSIZE. */
1753 do {
1754 i = 0;
1755 if (pickler_write(self, &mark_op, 1) < 0)
1756 return -1;
1757 while (PyDict_Next(obj, &ppos, &key, &value)) {
1758 if (save(self, key, 0) < 0)
1759 return -1;
1760 if (save(self, value, 0) < 0)
1761 return -1;
1762 if (++i == BATCHSIZE)
1763 break;
1764 }
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00001765 if (pickler_write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00001766 return -1;
1767 if (PyDict_Size(obj) != dict_size) {
1768 PyErr_Format(
1769 PyExc_RuntimeError,
1770 "dictionary changed size during iteration");
1771 return -1;
1772 }
1773
1774 } while (i == BATCHSIZE);
1775 return 0;
1776}
1777
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001778static int
1779save_dict(PicklerObject *self, PyObject *obj)
1780{
1781 PyObject *items, *iter;
1782 char header[3];
1783 int len;
1784 int status = 0;
1785
1786 if (self->fast && !fast_save_enter(self, obj))
1787 goto error;
1788
1789 /* Create an empty dict. */
1790 if (self->bin) {
1791 header[0] = EMPTY_DICT;
1792 len = 1;
1793 }
1794 else {
1795 header[0] = MARK;
1796 header[1] = DICT;
1797 len = 2;
1798 }
1799
1800 if (pickler_write(self, header, len) < 0)
1801 goto error;
1802
1803 /* Get dict size, and bow out early if empty. */
1804 if ((len = PyDict_Size(obj)) < 0)
1805 goto error;
1806
1807 if (memo_put(self, obj) < 0)
1808 goto error;
1809
1810 if (len != 0) {
1811 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00001812 if (PyDict_CheckExact(obj) && self->proto > 0) {
1813 /* We can take certain shortcuts if we know this is a dict and
1814 not a dict subclass. */
1815 if (Py_EnterRecursiveCall(" while pickling an object") == 0) {
1816 status = batch_dict_exact(self, obj);
1817 Py_LeaveRecursiveCall();
1818 }
1819 } else {
1820 items = PyObject_CallMethod(obj, "items", "()");
1821 if (items == NULL)
1822 goto error;
1823 iter = PyObject_GetIter(items);
1824 Py_DECREF(items);
1825 if (iter == NULL)
1826 goto error;
1827 status = batch_dict(self, iter);
1828 Py_DECREF(iter);
1829 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001830 }
1831
1832 if (0) {
1833 error:
1834 status = -1;
1835 }
1836
1837 if (self->fast && !fast_save_leave(self, obj))
1838 status = -1;
1839
1840 return status;
1841}
1842
1843static int
1844save_global(PicklerObject *self, PyObject *obj, PyObject *name)
1845{
1846 static PyObject *name_str = NULL;
1847 PyObject *global_name = NULL;
1848 PyObject *module_name = NULL;
1849 PyObject *module = NULL;
1850 PyObject *cls;
1851 int status = 0;
1852
1853 const char global_op = GLOBAL;
1854
1855 if (name_str == NULL) {
1856 name_str = PyUnicode_InternFromString("__name__");
1857 if (name_str == NULL)
1858 goto error;
1859 }
1860
1861 if (name) {
1862 global_name = name;
1863 Py_INCREF(global_name);
1864 }
1865 else {
1866 global_name = PyObject_GetAttr(obj, name_str);
1867 if (global_name == NULL)
1868 goto error;
1869 }
1870
1871 module_name = whichmodule(obj, global_name);
1872 if (module_name == NULL)
1873 goto error;
1874
1875 /* XXX: Change to use the import C API directly with level=0 to disallow
1876 relative imports.
1877
1878 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
1879 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
1880 custom import functions (IMHO, this would be a nice security
1881 feature). The import C API would need to be extended to support the
1882 extra parameters of __import__ to fix that. */
1883 module = PyImport_Import(module_name);
1884 if (module == NULL) {
1885 PyErr_Format(PicklingError,
1886 "Can't pickle %R: import of module %R failed",
1887 obj, module_name);
1888 goto error;
1889 }
1890 cls = PyObject_GetAttr(module, global_name);
1891 if (cls == NULL) {
1892 PyErr_Format(PicklingError,
1893 "Can't pickle %R: attribute lookup %S.%S failed",
1894 obj, module_name, global_name);
1895 goto error;
1896 }
1897 if (cls != obj) {
1898 Py_DECREF(cls);
1899 PyErr_Format(PicklingError,
1900 "Can't pickle %R: it's not the same object as %S.%S",
1901 obj, module_name, global_name);
1902 goto error;
1903 }
1904 Py_DECREF(cls);
1905
1906 if (self->proto >= 2) {
1907 /* See whether this is in the extension registry, and if
1908 * so generate an EXT opcode.
1909 */
1910 PyObject *code_obj; /* extension code as Python object */
1911 long code; /* extension code as C value */
1912 char pdata[5];
1913 int n;
1914
1915 PyTuple_SET_ITEM(two_tuple, 0, module_name);
1916 PyTuple_SET_ITEM(two_tuple, 1, global_name);
1917 code_obj = PyDict_GetItem(extension_registry, two_tuple);
1918 /* The object is not registered in the extension registry.
1919 This is the most likely code path. */
1920 if (code_obj == NULL)
1921 goto gen_global;
1922
1923 /* XXX: pickle.py doesn't check neither the type, nor the range
1924 of the value returned by the extension_registry. It should for
1925 consistency. */
1926
1927 /* Verify code_obj has the right type and value. */
1928 if (!PyLong_Check(code_obj)) {
1929 PyErr_Format(PicklingError,
1930 "Can't pickle %R: extension code %R isn't an integer",
1931 obj, code_obj);
1932 goto error;
1933 }
1934 code = PyLong_AS_LONG(code_obj);
1935 if (code <= 0 || code > 0x7fffffffL) {
1936 PyErr_Format(PicklingError,
1937 "Can't pickle %R: extension code %ld is out of range",
1938 obj, code);
1939 goto error;
1940 }
1941
1942 /* Generate an EXT opcode. */
1943 if (code <= 0xff) {
1944 pdata[0] = EXT1;
1945 pdata[1] = (unsigned char)code;
1946 n = 2;
1947 }
1948 else if (code <= 0xffff) {
1949 pdata[0] = EXT2;
1950 pdata[1] = (unsigned char)(code & 0xff);
1951 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1952 n = 3;
1953 }
1954 else {
1955 pdata[0] = EXT4;
1956 pdata[1] = (unsigned char)(code & 0xff);
1957 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1958 pdata[3] = (unsigned char)((code >> 16) & 0xff);
1959 pdata[4] = (unsigned char)((code >> 24) & 0xff);
1960 n = 5;
1961 }
1962
1963 if (pickler_write(self, pdata, n) < 0)
1964 goto error;
1965 }
1966 else {
1967 /* Generate a normal global opcode if we are using a pickle
1968 protocol <= 2, or if the object is not registered in the
1969 extension registry. */
1970 PyObject *encoded;
1971 PyObject *(*unicode_encoder)(PyObject *);
1972
1973 gen_global:
1974 if (pickler_write(self, &global_op, 1) < 0)
1975 goto error;
1976
1977 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
1978 the module name and the global name using UTF-8. We do so only when
1979 we are using the pickle protocol newer than version 3. This is to
1980 ensure compatibility with older Unpickler running on Python 2.x. */
1981 if (self->proto >= 3) {
1982 unicode_encoder = PyUnicode_AsUTF8String;
1983 }
1984 else {
1985 unicode_encoder = PyUnicode_AsASCIIString;
1986 }
1987
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001988 /* For protocol < 3 and if the user didn't request against doing so,
1989 we convert module names to the old 2.x module names. */
1990 if (self->fix_imports) {
1991 PyObject *key;
1992 PyObject *item;
1993
1994 key = PyTuple_Pack(2, module_name, global_name);
1995 if (key == NULL)
1996 goto error;
1997 item = PyDict_GetItemWithError(name_mapping_3to2, key);
1998 Py_DECREF(key);
1999 if (item) {
2000 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
2001 PyErr_Format(PyExc_RuntimeError,
2002 "_compat_pickle.REVERSE_NAME_MAPPING values "
2003 "should be 2-tuples, not %.200s",
2004 Py_TYPE(item)->tp_name);
2005 goto error;
2006 }
2007 Py_CLEAR(module_name);
2008 Py_CLEAR(global_name);
2009 module_name = PyTuple_GET_ITEM(item, 0);
2010 global_name = PyTuple_GET_ITEM(item, 1);
2011 if (!PyUnicode_Check(module_name) ||
2012 !PyUnicode_Check(global_name)) {
2013 PyErr_Format(PyExc_RuntimeError,
2014 "_compat_pickle.REVERSE_NAME_MAPPING values "
2015 "should be pairs of str, not (%.200s, %.200s)",
2016 Py_TYPE(module_name)->tp_name,
2017 Py_TYPE(global_name)->tp_name);
2018 goto error;
2019 }
2020 Py_INCREF(module_name);
2021 Py_INCREF(global_name);
2022 }
2023 else if (PyErr_Occurred()) {
2024 goto error;
2025 }
2026
2027 item = PyDict_GetItemWithError(import_mapping_3to2, module_name);
2028 if (item) {
2029 if (!PyUnicode_Check(item)) {
2030 PyErr_Format(PyExc_RuntimeError,
2031 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
2032 "should be strings, not %.200s",
2033 Py_TYPE(item)->tp_name);
2034 goto error;
2035 }
2036 Py_CLEAR(module_name);
2037 module_name = item;
2038 Py_INCREF(module_name);
2039 }
2040 else if (PyErr_Occurred()) {
2041 goto error;
2042 }
2043 }
2044
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002045 /* Save the name of the module. */
2046 encoded = unicode_encoder(module_name);
2047 if (encoded == NULL) {
2048 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2049 PyErr_Format(PicklingError,
2050 "can't pickle module identifier '%S' using "
2051 "pickle protocol %i", module_name, self->proto);
2052 goto error;
2053 }
2054 if (pickler_write(self, PyBytes_AS_STRING(encoded),
2055 PyBytes_GET_SIZE(encoded)) < 0) {
2056 Py_DECREF(encoded);
2057 goto error;
2058 }
2059 Py_DECREF(encoded);
2060 if(pickler_write(self, "\n", 1) < 0)
2061 goto error;
2062
2063 /* Save the name of the module. */
2064 encoded = unicode_encoder(global_name);
2065 if (encoded == NULL) {
2066 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2067 PyErr_Format(PicklingError,
2068 "can't pickle global identifier '%S' using "
2069 "pickle protocol %i", global_name, self->proto);
2070 goto error;
2071 }
2072 if (pickler_write(self, PyBytes_AS_STRING(encoded),
2073 PyBytes_GET_SIZE(encoded)) < 0) {
2074 Py_DECREF(encoded);
2075 goto error;
2076 }
2077 Py_DECREF(encoded);
2078 if(pickler_write(self, "\n", 1) < 0)
2079 goto error;
2080
2081 /* Memoize the object. */
2082 if (memo_put(self, obj) < 0)
2083 goto error;
2084 }
2085
2086 if (0) {
2087 error:
2088 status = -1;
2089 }
2090 Py_XDECREF(module_name);
2091 Py_XDECREF(global_name);
2092 Py_XDECREF(module);
2093
2094 return status;
2095}
2096
2097static int
2098save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
2099{
2100 PyObject *pid = NULL;
2101 int status = 0;
2102
2103 const char persid_op = PERSID;
2104 const char binpersid_op = BINPERSID;
2105
2106 Py_INCREF(obj);
2107 pid = pickler_call(self, func, obj);
2108 if (pid == NULL)
2109 return -1;
2110
2111 if (pid != Py_None) {
2112 if (self->bin) {
2113 if (save(self, pid, 1) < 0 ||
2114 pickler_write(self, &binpersid_op, 1) < 0)
2115 goto error;
2116 }
2117 else {
2118 PyObject *pid_str = NULL;
2119 char *pid_ascii_bytes;
2120 Py_ssize_t size;
2121
2122 pid_str = PyObject_Str(pid);
2123 if (pid_str == NULL)
2124 goto error;
2125
2126 /* XXX: Should it check whether the persistent id only contains
2127 ASCII characters? And what if the pid contains embedded
2128 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00002129 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002130 Py_DECREF(pid_str);
2131 if (pid_ascii_bytes == NULL)
2132 goto error;
2133
2134 if (pickler_write(self, &persid_op, 1) < 0 ||
2135 pickler_write(self, pid_ascii_bytes, size) < 0 ||
2136 pickler_write(self, "\n", 1) < 0)
2137 goto error;
2138 }
2139 status = 1;
2140 }
2141
2142 if (0) {
2143 error:
2144 status = -1;
2145 }
2146 Py_XDECREF(pid);
2147
2148 return status;
2149}
2150
2151/* We're saving obj, and args is the 2-thru-5 tuple returned by the
2152 * appropriate __reduce__ method for obj.
2153 */
2154static int
2155save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
2156{
2157 PyObject *callable;
2158 PyObject *argtup;
2159 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002160 PyObject *listitems = Py_None;
2161 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002162 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002163
2164 int use_newobj = self->proto >= 2;
2165
2166 const char reduce_op = REDUCE;
2167 const char build_op = BUILD;
2168 const char newobj_op = NEWOBJ;
2169
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002170 size = PyTuple_Size(args);
2171 if (size < 2 || size > 5) {
2172 PyErr_SetString(PicklingError, "tuple returned by "
2173 "__reduce__ must contain 2 through 5 elements");
2174 return -1;
2175 }
2176
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002177 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2178 &callable, &argtup, &state, &listitems, &dictitems))
2179 return -1;
2180
2181 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002182 PyErr_SetString(PicklingError, "first item of the tuple "
2183 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002184 return -1;
2185 }
2186 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002187 PyErr_SetString(PicklingError, "second item of the tuple "
2188 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002189 return -1;
2190 }
2191
2192 if (state == Py_None)
2193 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002194
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002195 if (listitems == Py_None)
2196 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002197 else if (!PyIter_Check(listitems)) {
2198 PyErr_Format(PicklingError, "Fourth element of tuple"
2199 "returned by __reduce__ must be an iterator, not %s",
2200 Py_TYPE(listitems)->tp_name);
2201 return -1;
2202 }
2203
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002204 if (dictitems == Py_None)
2205 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002206 else if (!PyIter_Check(dictitems)) {
2207 PyErr_Format(PicklingError, "Fifth element of tuple"
2208 "returned by __reduce__ must be an iterator, not %s",
2209 Py_TYPE(dictitems)->tp_name);
2210 return -1;
2211 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002212
2213 /* Protocol 2 special case: if callable's name is __newobj__, use
2214 NEWOBJ. */
2215 if (use_newobj) {
2216 static PyObject *newobj_str = NULL;
2217 PyObject *name_str;
2218
2219 if (newobj_str == NULL) {
2220 newobj_str = PyUnicode_InternFromString("__newobj__");
2221 }
2222
2223 name_str = PyObject_GetAttrString(callable, "__name__");
2224 if (name_str == NULL) {
2225 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2226 PyErr_Clear();
2227 else
2228 return -1;
2229 use_newobj = 0;
2230 }
2231 else {
2232 use_newobj = PyUnicode_Check(name_str) &&
2233 PyUnicode_Compare(name_str, newobj_str) == 0;
2234 Py_DECREF(name_str);
2235 }
2236 }
2237 if (use_newobj) {
2238 PyObject *cls;
2239 PyObject *newargtup;
2240 PyObject *obj_class;
2241 int p;
2242
2243 /* Sanity checks. */
2244 if (Py_SIZE(argtup) < 1) {
2245 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2246 return -1;
2247 }
2248
2249 cls = PyTuple_GET_ITEM(argtup, 0);
2250 if (!PyObject_HasAttrString(cls, "__new__")) {
2251 PyErr_SetString(PicklingError, "args[0] from "
2252 "__newobj__ args has no __new__");
2253 return -1;
2254 }
2255
2256 if (obj != NULL) {
2257 obj_class = PyObject_GetAttrString(obj, "__class__");
2258 if (obj_class == NULL) {
2259 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2260 PyErr_Clear();
2261 else
2262 return -1;
2263 }
2264 p = obj_class != cls; /* true iff a problem */
2265 Py_DECREF(obj_class);
2266 if (p) {
2267 PyErr_SetString(PicklingError, "args[0] from "
2268 "__newobj__ args has the wrong class");
2269 return -1;
2270 }
2271 }
2272 /* XXX: These calls save() are prone to infinite recursion. Imagine
2273 what happen if the value returned by the __reduce__() method of
2274 some extension type contains another object of the same type. Ouch!
2275
2276 Here is a quick example, that I ran into, to illustrate what I
2277 mean:
2278
2279 >>> import pickle, copyreg
2280 >>> copyreg.dispatch_table.pop(complex)
2281 >>> pickle.dumps(1+2j)
2282 Traceback (most recent call last):
2283 ...
2284 RuntimeError: maximum recursion depth exceeded
2285
2286 Removing the complex class from copyreg.dispatch_table made the
2287 __reduce_ex__() method emit another complex object:
2288
2289 >>> (1+1j).__reduce_ex__(2)
2290 (<function __newobj__ at 0xb7b71c3c>,
2291 (<class 'complex'>, (1+1j)), None, None, None)
2292
2293 Thus when save() was called on newargstup (the 2nd item) recursion
2294 ensued. Of course, the bug was in the complex class which had a
2295 broken __getnewargs__() that emitted another complex object. But,
2296 the point, here, is it is quite easy to end up with a broken reduce
2297 function. */
2298
2299 /* Save the class and its __new__ arguments. */
2300 if (save(self, cls, 0) < 0)
2301 return -1;
2302
2303 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
2304 if (newargtup == NULL)
2305 return -1;
2306
2307 p = save(self, newargtup, 0);
2308 Py_DECREF(newargtup);
2309 if (p < 0)
2310 return -1;
2311
2312 /* Add NEWOBJ opcode. */
2313 if (pickler_write(self, &newobj_op, 1) < 0)
2314 return -1;
2315 }
2316 else { /* Not using NEWOBJ. */
2317 if (save(self, callable, 0) < 0 ||
2318 save(self, argtup, 0) < 0 ||
2319 pickler_write(self, &reduce_op, 1) < 0)
2320 return -1;
2321 }
2322
2323 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
2324 the caller do not want to memoize the object. Not particularly useful,
2325 but that is to mimic the behavior save_reduce() in pickle.py when
2326 obj is None. */
2327 if (obj && memo_put(self, obj) < 0)
2328 return -1;
2329
2330 if (listitems && batch_list(self, listitems) < 0)
2331 return -1;
2332
2333 if (dictitems && batch_dict(self, dictitems) < 0)
2334 return -1;
2335
2336 if (state) {
2337 if (save(self, state, 0) < 0 ||
2338 pickler_write(self, &build_op, 1) < 0)
2339 return -1;
2340 }
2341
2342 return 0;
2343}
2344
2345static int
2346save(PicklerObject *self, PyObject *obj, int pers_save)
2347{
2348 PyTypeObject *type;
2349 PyObject *reduce_func = NULL;
2350 PyObject *reduce_value = NULL;
2351 PyObject *memo_key = NULL;
2352 int status = 0;
2353
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002354 if (Py_EnterRecursiveCall(" while pickling an object") < 0)
2355 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002356
2357 /* The extra pers_save argument is necessary to avoid calling save_pers()
2358 on its returned object. */
2359 if (!pers_save && self->pers_func) {
2360 /* save_pers() returns:
2361 -1 to signal an error;
2362 0 if it did nothing successfully;
2363 1 if a persistent id was saved.
2364 */
2365 if ((status = save_pers(self, obj, self->pers_func)) != 0)
2366 goto done;
2367 }
2368
2369 type = Py_TYPE(obj);
2370
2371 /* XXX: The old cPickle had an optimization that used switch-case
2372 statement dispatching on the first letter of the type name. It was
2373 probably not a bad idea after all. If benchmarks shows that particular
2374 optimization had some real benefits, it would be nice to add it
2375 back. */
2376
2377 /* Atom types; these aren't memoized, so don't check the memo. */
2378
2379 if (obj == Py_None) {
2380 status = save_none(self, obj);
2381 goto done;
2382 }
2383 else if (obj == Py_False || obj == Py_True) {
2384 status = save_bool(self, obj);
2385 goto done;
2386 }
2387 else if (type == &PyLong_Type) {
2388 status = save_long(self, obj);
2389 goto done;
2390 }
2391 else if (type == &PyFloat_Type) {
2392 status = save_float(self, obj);
2393 goto done;
2394 }
2395
2396 /* Check the memo to see if it has the object. If so, generate
2397 a GET (or BINGET) opcode, instead of pickling the object
2398 once again. */
2399 memo_key = PyLong_FromVoidPtr(obj);
2400 if (memo_key == NULL)
2401 goto error;
2402 if (PyDict_GetItem(self->memo, memo_key)) {
2403 if (memo_get(self, memo_key) < 0)
2404 goto error;
2405 goto done;
2406 }
2407
2408 if (type == &PyBytes_Type) {
2409 status = save_bytes(self, obj);
2410 goto done;
2411 }
2412 else if (type == &PyUnicode_Type) {
2413 status = save_unicode(self, obj);
2414 goto done;
2415 }
2416 else if (type == &PyDict_Type) {
2417 status = save_dict(self, obj);
2418 goto done;
2419 }
2420 else if (type == &PyList_Type) {
2421 status = save_list(self, obj);
2422 goto done;
2423 }
2424 else if (type == &PyTuple_Type) {
2425 status = save_tuple(self, obj);
2426 goto done;
2427 }
2428 else if (type == &PyType_Type) {
2429 status = save_global(self, obj, NULL);
2430 goto done;
2431 }
2432 else if (type == &PyFunction_Type) {
2433 status = save_global(self, obj, NULL);
2434 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
2435 /* fall back to reduce */
2436 PyErr_Clear();
2437 }
2438 else {
2439 goto done;
2440 }
2441 }
2442 else if (type == &PyCFunction_Type) {
2443 status = save_global(self, obj, NULL);
2444 goto done;
2445 }
2446 else if (PyType_IsSubtype(type, &PyType_Type)) {
2447 status = save_global(self, obj, NULL);
2448 goto done;
2449 }
2450
2451 /* XXX: This part needs some unit tests. */
2452
2453 /* Get a reduction callable, and call it. This may come from
2454 * copyreg.dispatch_table, the object's __reduce_ex__ method,
2455 * or the object's __reduce__ method.
2456 */
2457 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
2458 if (reduce_func != NULL) {
2459 /* Here, the reference count of the reduce_func object returned by
2460 PyDict_GetItem needs to be increased to be consistent with the one
2461 returned by PyObject_GetAttr. This is allow us to blindly DECREF
2462 reduce_func at the end of the save() routine.
2463 */
2464 Py_INCREF(reduce_func);
2465 Py_INCREF(obj);
2466 reduce_value = pickler_call(self, reduce_func, obj);
2467 }
2468 else {
2469 static PyObject *reduce_str = NULL;
2470 static PyObject *reduce_ex_str = NULL;
2471
2472 /* Cache the name of the reduce methods. */
2473 if (reduce_str == NULL) {
2474 reduce_str = PyUnicode_InternFromString("__reduce__");
2475 if (reduce_str == NULL)
2476 goto error;
2477 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
2478 if (reduce_ex_str == NULL)
2479 goto error;
2480 }
2481
2482 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
2483 automatically defined as __reduce__. While this is convenient, this
2484 make it impossible to know which method was actually called. Of
2485 course, this is not a big deal. But still, it would be nice to let
2486 the user know which method was called when something go
2487 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
2488 don't actually have to check for a __reduce__ method. */
2489
2490 /* Check for a __reduce_ex__ method. */
2491 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
2492 if (reduce_func != NULL) {
2493 PyObject *proto;
2494 proto = PyLong_FromLong(self->proto);
2495 if (proto != NULL) {
2496 reduce_value = pickler_call(self, reduce_func, proto);
2497 }
2498 }
2499 else {
2500 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2501 PyErr_Clear();
2502 else
2503 goto error;
2504 /* Check for a __reduce__ method. */
2505 reduce_func = PyObject_GetAttr(obj, reduce_str);
2506 if (reduce_func != NULL) {
2507 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
2508 }
2509 else {
2510 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
2511 type->tp_name, obj);
2512 goto error;
2513 }
2514 }
2515 }
2516
2517 if (reduce_value == NULL)
2518 goto error;
2519
2520 if (PyUnicode_Check(reduce_value)) {
2521 status = save_global(self, obj, reduce_value);
2522 goto done;
2523 }
2524
2525 if (!PyTuple_Check(reduce_value)) {
2526 PyErr_SetString(PicklingError,
2527 "__reduce__ must return a string or tuple");
2528 goto error;
2529 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002530
2531 status = save_reduce(self, reduce_value, obj);
2532
2533 if (0) {
2534 error:
2535 status = -1;
2536 }
2537 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002538 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002539 Py_XDECREF(memo_key);
2540 Py_XDECREF(reduce_func);
2541 Py_XDECREF(reduce_value);
2542
2543 return status;
2544}
2545
2546static int
2547dump(PicklerObject *self, PyObject *obj)
2548{
2549 const char stop_op = STOP;
2550
2551 if (self->proto >= 2) {
2552 char header[2];
2553
2554 header[0] = PROTO;
2555 assert(self->proto >= 0 && self->proto < 256);
2556 header[1] = (unsigned char)self->proto;
2557 if (pickler_write(self, header, 2) < 0)
2558 return -1;
2559 }
2560
2561 if (save(self, obj, 0) < 0 ||
2562 pickler_write(self, &stop_op, 1) < 0 ||
2563 pickler_write(self, NULL, 0) < 0)
2564 return -1;
2565
2566 return 0;
2567}
2568
2569PyDoc_STRVAR(Pickler_clear_memo_doc,
2570"clear_memo() -> None. Clears the pickler's \"memo\"."
2571"\n"
2572"The memo is the data structure that remembers which objects the\n"
2573"pickler has already seen, so that shared or recursive objects are\n"
2574"pickled by reference and not by value. This method is useful when\n"
2575"re-using picklers.");
2576
2577static PyObject *
2578Pickler_clear_memo(PicklerObject *self)
2579{
2580 if (self->memo)
2581 PyDict_Clear(self->memo);
2582
2583 Py_RETURN_NONE;
2584}
2585
2586PyDoc_STRVAR(Pickler_dump_doc,
2587"dump(obj) -> None. Write a pickled representation of obj to the open file.");
2588
2589static PyObject *
2590Pickler_dump(PicklerObject *self, PyObject *args)
2591{
2592 PyObject *obj;
2593
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00002594 /* Check whether the Pickler was initialized correctly (issue3664).
2595 Developers often forget to call __init__() in their subclasses, which
2596 would trigger a segfault without this check. */
2597 if (self->write == NULL) {
2598 PyErr_Format(PicklingError,
2599 "Pickler.__init__() was not called by %s.__init__()",
2600 Py_TYPE(self)->tp_name);
2601 return NULL;
2602 }
2603
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002604 if (!PyArg_ParseTuple(args, "O:dump", &obj))
2605 return NULL;
2606
2607 if (dump(self, obj) < 0)
2608 return NULL;
2609
2610 Py_RETURN_NONE;
2611}
2612
2613static struct PyMethodDef Pickler_methods[] = {
2614 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
2615 Pickler_dump_doc},
2616 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
2617 Pickler_clear_memo_doc},
2618 {NULL, NULL} /* sentinel */
2619};
2620
2621static void
2622Pickler_dealloc(PicklerObject *self)
2623{
2624 PyObject_GC_UnTrack(self);
2625
2626 Py_XDECREF(self->write);
2627 Py_XDECREF(self->memo);
2628 Py_XDECREF(self->pers_func);
2629 Py_XDECREF(self->arg);
2630 Py_XDECREF(self->fast_memo);
2631
2632 PyMem_Free(self->write_buf);
2633
2634 Py_TYPE(self)->tp_free((PyObject *)self);
2635}
2636
2637static int
2638Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
2639{
2640 Py_VISIT(self->write);
2641 Py_VISIT(self->memo);
2642 Py_VISIT(self->pers_func);
2643 Py_VISIT(self->arg);
2644 Py_VISIT(self->fast_memo);
2645 return 0;
2646}
2647
2648static int
2649Pickler_clear(PicklerObject *self)
2650{
2651 Py_CLEAR(self->write);
2652 Py_CLEAR(self->memo);
2653 Py_CLEAR(self->pers_func);
2654 Py_CLEAR(self->arg);
2655 Py_CLEAR(self->fast_memo);
2656
2657 PyMem_Free(self->write_buf);
2658 self->write_buf = NULL;
2659
2660 return 0;
2661}
2662
2663PyDoc_STRVAR(Pickler_doc,
2664"Pickler(file, protocol=None)"
2665"\n"
2666"This takes a binary file for writing a pickle data stream.\n"
2667"\n"
2668"The optional protocol argument tells the pickler to use the\n"
2669"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
2670"protocol is 3; a backward-incompatible protocol designed for\n"
2671"Python 3.0.\n"
2672"\n"
2673"Specifying a negative protocol version selects the highest\n"
2674"protocol version supported. The higher the protocol used, the\n"
2675"more recent the version of Python needed to read the pickle\n"
2676"produced.\n"
2677"\n"
2678"The file argument must have a write() method that accepts a single\n"
2679"bytes argument. It can thus be a file object opened for binary\n"
2680"writing, a io.BytesIO instance, or any other custom object that\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002681"meets this interface.\n"
2682"\n"
2683"If fix_imports is True and protocol is less than 3, pickle will try to\n"
2684"map the new Python 3.x names to the old module names used in Python\n"
2685"2.x, so that the pickle data stream is readable with Python 2.x.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002686
2687static int
2688Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
2689{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002690 static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002691 PyObject *file;
2692 PyObject *proto_obj = NULL;
2693 long proto = 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002694 int fix_imports = 1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002695
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002696 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oi:Pickler",
2697 kwlist, &file, &proto_obj, &fix_imports))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002698 return -1;
2699
2700 /* In case of multiple __init__() calls, clear previous content. */
2701 if (self->write != NULL)
2702 (void)Pickler_clear(self);
2703
2704 if (proto_obj == NULL || proto_obj == Py_None)
2705 proto = DEFAULT_PROTOCOL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002706 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002707 proto = PyLong_AsLong(proto_obj);
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002708 if (proto == -1 && PyErr_Occurred())
2709 return -1;
2710 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002711
2712 if (proto < 0)
2713 proto = HIGHEST_PROTOCOL;
2714 if (proto > HIGHEST_PROTOCOL) {
2715 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
2716 HIGHEST_PROTOCOL);
2717 return -1;
2718 }
2719
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002720 self->proto = proto;
2721 self->bin = proto > 0;
2722 self->arg = NULL;
2723 self->fast = 0;
2724 self->fast_nesting = 0;
2725 self->fast_memo = NULL;
2726 self->fix_imports = fix_imports && proto < 3;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002727
2728 if (!PyObject_HasAttrString(file, "write")) {
2729 PyErr_SetString(PyExc_TypeError,
2730 "file must have a 'write' attribute");
2731 return -1;
2732 }
2733 self->write = PyObject_GetAttrString(file, "write");
2734 if (self->write == NULL)
2735 return -1;
2736 self->buf_size = 0;
2737 self->write_buf = (char *)PyMem_Malloc(WRITE_BUF_SIZE);
2738 if (self->write_buf == NULL) {
2739 PyErr_NoMemory();
2740 return -1;
2741 }
2742 self->pers_func = NULL;
2743 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
2744 self->pers_func = PyObject_GetAttrString((PyObject *)self,
2745 "persistent_id");
2746 if (self->pers_func == NULL)
2747 return -1;
2748 }
2749 self->memo = PyDict_New();
2750 if (self->memo == NULL)
2751 return -1;
2752
2753 return 0;
2754}
2755
2756static PyObject *
2757Pickler_get_memo(PicklerObject *self)
2758{
2759 if (self->memo == NULL)
2760 PyErr_SetString(PyExc_AttributeError, "memo");
2761 else
2762 Py_INCREF(self->memo);
2763 return self->memo;
2764}
2765
2766static int
2767Pickler_set_memo(PicklerObject *self, PyObject *value)
2768{
2769 PyObject *tmp;
2770
2771 if (value == NULL) {
2772 PyErr_SetString(PyExc_TypeError,
2773 "attribute deletion is not supported");
2774 return -1;
2775 }
2776 if (!PyDict_Check(value)) {
2777 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
2778 return -1;
2779 }
2780
2781 tmp = self->memo;
2782 Py_INCREF(value);
2783 self->memo = value;
2784 Py_XDECREF(tmp);
2785
2786 return 0;
2787}
2788
2789static PyObject *
2790Pickler_get_persid(PicklerObject *self)
2791{
2792 if (self->pers_func == NULL)
2793 PyErr_SetString(PyExc_AttributeError, "persistent_id");
2794 else
2795 Py_INCREF(self->pers_func);
2796 return self->pers_func;
2797}
2798
2799static int
2800Pickler_set_persid(PicklerObject *self, PyObject *value)
2801{
2802 PyObject *tmp;
2803
2804 if (value == NULL) {
2805 PyErr_SetString(PyExc_TypeError,
2806 "attribute deletion is not supported");
2807 return -1;
2808 }
2809 if (!PyCallable_Check(value)) {
2810 PyErr_SetString(PyExc_TypeError,
2811 "persistent_id must be a callable taking one argument");
2812 return -1;
2813 }
2814
2815 tmp = self->pers_func;
2816 Py_INCREF(value);
2817 self->pers_func = value;
2818 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
2819
2820 return 0;
2821}
2822
2823static PyMemberDef Pickler_members[] = {
2824 {"bin", T_INT, offsetof(PicklerObject, bin)},
2825 {"fast", T_INT, offsetof(PicklerObject, fast)},
2826 {NULL}
2827};
2828
2829static PyGetSetDef Pickler_getsets[] = {
2830 {"memo", (getter)Pickler_get_memo,
2831 (setter)Pickler_set_memo},
2832 {"persistent_id", (getter)Pickler_get_persid,
2833 (setter)Pickler_set_persid},
2834 {NULL}
2835};
2836
2837static PyTypeObject Pickler_Type = {
2838 PyVarObject_HEAD_INIT(NULL, 0)
2839 "_pickle.Pickler" , /*tp_name*/
2840 sizeof(PicklerObject), /*tp_basicsize*/
2841 0, /*tp_itemsize*/
2842 (destructor)Pickler_dealloc, /*tp_dealloc*/
2843 0, /*tp_print*/
2844 0, /*tp_getattr*/
2845 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00002846 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002847 0, /*tp_repr*/
2848 0, /*tp_as_number*/
2849 0, /*tp_as_sequence*/
2850 0, /*tp_as_mapping*/
2851 0, /*tp_hash*/
2852 0, /*tp_call*/
2853 0, /*tp_str*/
2854 0, /*tp_getattro*/
2855 0, /*tp_setattro*/
2856 0, /*tp_as_buffer*/
2857 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2858 Pickler_doc, /*tp_doc*/
2859 (traverseproc)Pickler_traverse, /*tp_traverse*/
2860 (inquiry)Pickler_clear, /*tp_clear*/
2861 0, /*tp_richcompare*/
2862 0, /*tp_weaklistoffset*/
2863 0, /*tp_iter*/
2864 0, /*tp_iternext*/
2865 Pickler_methods, /*tp_methods*/
2866 Pickler_members, /*tp_members*/
2867 Pickler_getsets, /*tp_getset*/
2868 0, /*tp_base*/
2869 0, /*tp_dict*/
2870 0, /*tp_descr_get*/
2871 0, /*tp_descr_set*/
2872 0, /*tp_dictoffset*/
2873 (initproc)Pickler_init, /*tp_init*/
2874 PyType_GenericAlloc, /*tp_alloc*/
2875 PyType_GenericNew, /*tp_new*/
2876 PyObject_GC_Del, /*tp_free*/
2877 0, /*tp_is_gc*/
2878};
2879
2880/* Temporary helper for calling self.find_class().
2881
2882 XXX: It would be nice to able to avoid Python function call overhead, by
2883 using directly the C version of find_class(), when find_class() is not
2884 overridden by a subclass. Although, this could become rather hackish. A
2885 simpler optimization would be to call the C function when self is not a
2886 subclass instance. */
2887static PyObject *
2888find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
2889{
2890 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
2891 module_name, global_name);
2892}
2893
2894static int
2895marker(UnpicklerObject *self)
2896{
2897 if (self->num_marks < 1) {
2898 PyErr_SetString(UnpicklingError, "could not find MARK");
2899 return -1;
2900 }
2901
2902 return self->marks[--self->num_marks];
2903}
2904
2905static int
2906load_none(UnpicklerObject *self)
2907{
2908 PDATA_APPEND(self->stack, Py_None, -1);
2909 return 0;
2910}
2911
2912static int
2913bad_readline(void)
2914{
2915 PyErr_SetString(UnpicklingError, "pickle data was truncated");
2916 return -1;
2917}
2918
2919static int
2920load_int(UnpicklerObject *self)
2921{
2922 PyObject *value;
2923 char *endptr, *s;
2924 Py_ssize_t len;
2925 long x;
2926
2927 if ((len = unpickler_readline(self, &s)) < 0)
2928 return -1;
2929 if (len < 2)
2930 return bad_readline();
2931
2932 errno = 0;
2933 /* XXX: Should the base argument of strtol() be explicitly set to 10? */
2934 x = strtol(s, &endptr, 0);
2935
2936 if (errno || (*endptr != '\n') || (endptr[1] != '\0')) {
2937 /* Hm, maybe we've got something long. Let's try reading
2938 * it as a Python long object. */
2939 errno = 0;
2940 /* XXX: Same thing about the base here. */
2941 value = PyLong_FromString(s, NULL, 0);
2942 if (value == NULL) {
2943 PyErr_SetString(PyExc_ValueError,
2944 "could not convert string to int");
2945 return -1;
2946 }
2947 }
2948 else {
2949 if (len == 3 && (x == 0 || x == 1)) {
2950 if ((value = PyBool_FromLong(x)) == NULL)
2951 return -1;
2952 }
2953 else {
2954 if ((value = PyLong_FromLong(x)) == NULL)
2955 return -1;
2956 }
2957 }
2958
2959 PDATA_PUSH(self->stack, value, -1);
2960 return 0;
2961}
2962
2963static int
2964load_bool(UnpicklerObject *self, PyObject *boolean)
2965{
2966 assert(boolean == Py_True || boolean == Py_False);
2967 PDATA_APPEND(self->stack, boolean, -1);
2968 return 0;
2969}
2970
2971/* s contains x bytes of a little-endian integer. Return its value as a
2972 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
2973 * int, but when x is 4 it's a signed one. This is an historical source
2974 * of x-platform bugs.
2975 */
2976static long
2977calc_binint(char *bytes, int size)
2978{
2979 unsigned char *s = (unsigned char *)bytes;
2980 int i = size;
2981 long x = 0;
2982
2983 for (i = 0; i < size; i++) {
2984 x |= (long)s[i] << (i * 8);
2985 }
2986
2987 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
2988 * is signed, so on a box with longs bigger than 4 bytes we need
2989 * to extend a BININT's sign bit to the full width.
2990 */
2991 if (SIZEOF_LONG > 4 && size == 4) {
2992 x |= -(x & (1L << 31));
2993 }
2994
2995 return x;
2996}
2997
2998static int
2999load_binintx(UnpicklerObject *self, char *s, int size)
3000{
3001 PyObject *value;
3002 long x;
3003
3004 x = calc_binint(s, size);
3005
3006 if ((value = PyLong_FromLong(x)) == NULL)
3007 return -1;
3008
3009 PDATA_PUSH(self->stack, value, -1);
3010 return 0;
3011}
3012
3013static int
3014load_binint(UnpicklerObject *self)
3015{
3016 char *s;
3017
3018 if (unpickler_read(self, &s, 4) < 0)
3019 return -1;
3020
3021 return load_binintx(self, s, 4);
3022}
3023
3024static int
3025load_binint1(UnpicklerObject *self)
3026{
3027 char *s;
3028
3029 if (unpickler_read(self, &s, 1) < 0)
3030 return -1;
3031
3032 return load_binintx(self, s, 1);
3033}
3034
3035static int
3036load_binint2(UnpicklerObject *self)
3037{
3038 char *s;
3039
3040 if (unpickler_read(self, &s, 2) < 0)
3041 return -1;
3042
3043 return load_binintx(self, s, 2);
3044}
3045
3046static int
3047load_long(UnpicklerObject *self)
3048{
3049 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003050 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003051 Py_ssize_t len;
3052
3053 if ((len = unpickler_readline(self, &s)) < 0)
3054 return -1;
3055 if (len < 2)
3056 return bad_readline();
3057
Mark Dickinson8dd05142009-01-20 20:43:58 +00003058 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
3059 the 'L' before calling PyLong_FromString. In order to maintain
3060 compatibility with Python 3.0.0, we don't actually *require*
3061 the 'L' to be present. */
3062 if (s[len-2] == 'L') {
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003063 s[len-2] = '\0';
Mark Dickinson8dd05142009-01-20 20:43:58 +00003064 }
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00003065 /* XXX: Should the base argument explicitly set to 10? */
3066 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00003067 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003068 return -1;
3069
3070 PDATA_PUSH(self->stack, value, -1);
3071 return 0;
3072}
3073
3074/* 'size' bytes contain the # of bytes of little-endian 256's-complement
3075 * data following.
3076 */
3077static int
3078load_counted_long(UnpicklerObject *self, int size)
3079{
3080 PyObject *value;
3081 char *nbytes;
3082 char *pdata;
3083
3084 assert(size == 1 || size == 4);
3085 if (unpickler_read(self, &nbytes, size) < 0)
3086 return -1;
3087
3088 size = calc_binint(nbytes, size);
3089 if (size < 0) {
3090 /* Corrupt or hostile pickle -- we never write one like this */
3091 PyErr_SetString(UnpicklingError,
3092 "LONG pickle has negative byte count");
3093 return -1;
3094 }
3095
3096 if (size == 0)
3097 value = PyLong_FromLong(0L);
3098 else {
3099 /* Read the raw little-endian bytes and convert. */
3100 if (unpickler_read(self, &pdata, size) < 0)
3101 return -1;
3102 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
3103 1 /* little endian */ , 1 /* signed */ );
3104 }
3105 if (value == NULL)
3106 return -1;
3107 PDATA_PUSH(self->stack, value, -1);
3108 return 0;
3109}
3110
3111static int
3112load_float(UnpicklerObject *self)
3113{
3114 PyObject *value;
3115 char *endptr, *s;
3116 Py_ssize_t len;
3117 double d;
3118
3119 if ((len = unpickler_readline(self, &s)) < 0)
3120 return -1;
3121 if (len < 2)
3122 return bad_readline();
3123
3124 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00003125 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
3126 if (d == -1.0 && PyErr_Occurred())
3127 return -1;
3128 if ((endptr[0] != '\n') || (endptr[1] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003129 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
3130 return -1;
3131 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00003132 value = PyFloat_FromDouble(d);
3133 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003134 return -1;
3135
3136 PDATA_PUSH(self->stack, value, -1);
3137 return 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00003138 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003139
3140static int
3141load_binfloat(UnpicklerObject *self)
3142{
3143 PyObject *value;
3144 double x;
3145 char *s;
3146
3147 if (unpickler_read(self, &s, 8) < 0)
3148 return -1;
3149
3150 x = _PyFloat_Unpack8((unsigned char *)s, 0);
3151 if (x == -1.0 && PyErr_Occurred())
3152 return -1;
3153
3154 if ((value = PyFloat_FromDouble(x)) == NULL)
3155 return -1;
3156
3157 PDATA_PUSH(self->stack, value, -1);
3158 return 0;
3159}
3160
3161static int
3162load_string(UnpicklerObject *self)
3163{
3164 PyObject *bytes;
3165 PyObject *str = NULL;
3166 Py_ssize_t len;
3167 char *s, *p;
3168
3169 if ((len = unpickler_readline(self, &s)) < 0)
3170 return -1;
3171 if (len < 3)
3172 return bad_readline();
3173 if ((s = strdup(s)) == NULL) {
3174 PyErr_NoMemory();
3175 return -1;
3176 }
3177
3178 /* Strip outermost quotes */
3179 while (s[len - 1] <= ' ')
3180 len--;
3181 if (s[0] == '"' && s[len - 1] == '"') {
3182 s[len - 1] = '\0';
3183 p = s + 1;
3184 len -= 2;
3185 }
3186 else if (s[0] == '\'' && s[len - 1] == '\'') {
3187 s[len - 1] = '\0';
3188 p = s + 1;
3189 len -= 2;
3190 }
3191 else {
3192 free(s);
3193 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
3194 return -1;
3195 }
3196
3197 /* Use the PyBytes API to decode the string, since that is what is used
3198 to encode, and then coerce the result to Unicode. */
3199 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
3200 free(s);
3201 if (bytes == NULL)
3202 return -1;
3203 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
3204 Py_DECREF(bytes);
3205 if (str == NULL)
3206 return -1;
3207
3208 PDATA_PUSH(self->stack, str, -1);
3209 return 0;
3210}
3211
3212static int
3213load_binbytes(UnpicklerObject *self)
3214{
3215 PyObject *bytes;
3216 long x;
3217 char *s;
3218
3219 if (unpickler_read(self, &s, 4) < 0)
3220 return -1;
3221
3222 x = calc_binint(s, 4);
3223 if (x < 0) {
3224 PyErr_SetString(UnpicklingError,
3225 "BINBYTES pickle has negative byte count");
3226 return -1;
3227 }
3228
3229 if (unpickler_read(self, &s, x) < 0)
3230 return -1;
3231 bytes = PyBytes_FromStringAndSize(s, x);
3232 if (bytes == NULL)
3233 return -1;
3234
3235 PDATA_PUSH(self->stack, bytes, -1);
3236 return 0;
3237}
3238
3239static int
3240load_short_binbytes(UnpicklerObject *self)
3241{
3242 PyObject *bytes;
3243 unsigned char x;
3244 char *s;
3245
3246 if (unpickler_read(self, &s, 1) < 0)
3247 return -1;
3248
3249 x = (unsigned char)s[0];
3250
3251 if (unpickler_read(self, &s, x) < 0)
3252 return -1;
3253
3254 bytes = PyBytes_FromStringAndSize(s, x);
3255 if (bytes == NULL)
3256 return -1;
3257
3258 PDATA_PUSH(self->stack, bytes, -1);
3259 return 0;
3260}
3261
3262static int
3263load_binstring(UnpicklerObject *self)
3264{
3265 PyObject *str;
3266 long x;
3267 char *s;
3268
3269 if (unpickler_read(self, &s, 4) < 0)
3270 return -1;
3271
3272 x = calc_binint(s, 4);
3273 if (x < 0) {
3274 PyErr_SetString(UnpicklingError,
3275 "BINSTRING pickle has negative byte count");
3276 return -1;
3277 }
3278
3279 if (unpickler_read(self, &s, x) < 0)
3280 return -1;
3281
3282 /* Convert Python 2.x strings to unicode. */
3283 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3284 if (str == NULL)
3285 return -1;
3286
3287 PDATA_PUSH(self->stack, str, -1);
3288 return 0;
3289}
3290
3291static int
3292load_short_binstring(UnpicklerObject *self)
3293{
3294 PyObject *str;
3295 unsigned char x;
3296 char *s;
3297
3298 if (unpickler_read(self, &s, 1) < 0)
3299 return -1;
3300
3301 x = (unsigned char)s[0];
3302
3303 if (unpickler_read(self, &s, x) < 0)
3304 return -1;
3305
3306 /* Convert Python 2.x strings to unicode. */
3307 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3308 if (str == NULL)
3309 return -1;
3310
3311 PDATA_PUSH(self->stack, str, -1);
3312 return 0;
3313}
3314
3315static int
3316load_unicode(UnpicklerObject *self)
3317{
3318 PyObject *str;
3319 Py_ssize_t len;
3320 char *s;
3321
3322 if ((len = unpickler_readline(self, &s)) < 0)
3323 return -1;
3324 if (len < 1)
3325 return bad_readline();
3326
3327 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
3328 if (str == NULL)
3329 return -1;
3330
3331 PDATA_PUSH(self->stack, str, -1);
3332 return 0;
3333}
3334
3335static int
3336load_binunicode(UnpicklerObject *self)
3337{
3338 PyObject *str;
3339 long size;
3340 char *s;
3341
3342 if (unpickler_read(self, &s, 4) < 0)
3343 return -1;
3344
3345 size = calc_binint(s, 4);
3346 if (size < 0) {
3347 PyErr_SetString(UnpicklingError,
3348 "BINUNICODE pickle has negative byte count");
3349 return -1;
3350 }
3351
3352 if (unpickler_read(self, &s, size) < 0)
3353 return -1;
3354
3355 str = PyUnicode_DecodeUTF8(s, size, NULL);
3356 if (str == NULL)
3357 return -1;
3358
3359 PDATA_PUSH(self->stack, str, -1);
3360 return 0;
3361}
3362
3363static int
3364load_tuple(UnpicklerObject *self)
3365{
3366 PyObject *tuple;
3367 int i;
3368
3369 if ((i = marker(self)) < 0)
3370 return -1;
3371
3372 tuple = Pdata_poptuple(self->stack, i);
3373 if (tuple == NULL)
3374 return -1;
3375 PDATA_PUSH(self->stack, tuple, -1);
3376 return 0;
3377}
3378
3379static int
3380load_counted_tuple(UnpicklerObject *self, int len)
3381{
3382 PyObject *tuple;
3383
3384 tuple = PyTuple_New(len);
3385 if (tuple == NULL)
3386 return -1;
3387
3388 while (--len >= 0) {
3389 PyObject *item;
3390
3391 PDATA_POP(self->stack, item);
3392 if (item == NULL)
3393 return -1;
3394 PyTuple_SET_ITEM(tuple, len, item);
3395 }
3396 PDATA_PUSH(self->stack, tuple, -1);
3397 return 0;
3398}
3399
3400static int
3401load_empty_list(UnpicklerObject *self)
3402{
3403 PyObject *list;
3404
3405 if ((list = PyList_New(0)) == NULL)
3406 return -1;
3407 PDATA_PUSH(self->stack, list, -1);
3408 return 0;
3409}
3410
3411static int
3412load_empty_dict(UnpicklerObject *self)
3413{
3414 PyObject *dict;
3415
3416 if ((dict = PyDict_New()) == NULL)
3417 return -1;
3418 PDATA_PUSH(self->stack, dict, -1);
3419 return 0;
3420}
3421
3422static int
3423load_list(UnpicklerObject *self)
3424{
3425 PyObject *list;
3426 int i;
3427
3428 if ((i = marker(self)) < 0)
3429 return -1;
3430
3431 list = Pdata_poplist(self->stack, i);
3432 if (list == NULL)
3433 return -1;
3434 PDATA_PUSH(self->stack, list, -1);
3435 return 0;
3436}
3437
3438static int
3439load_dict(UnpicklerObject *self)
3440{
3441 PyObject *dict, *key, *value;
3442 int i, j, k;
3443
3444 if ((i = marker(self)) < 0)
3445 return -1;
3446 j = self->stack->length;
3447
3448 if ((dict = PyDict_New()) == NULL)
3449 return -1;
3450
3451 for (k = i + 1; k < j; k += 2) {
3452 key = self->stack->data[k - 1];
3453 value = self->stack->data[k];
3454 if (PyDict_SetItem(dict, key, value) < 0) {
3455 Py_DECREF(dict);
3456 return -1;
3457 }
3458 }
3459 Pdata_clear(self->stack, i);
3460 PDATA_PUSH(self->stack, dict, -1);
3461 return 0;
3462}
3463
3464static PyObject *
3465instantiate(PyObject *cls, PyObject *args)
3466{
3467 PyObject *r = NULL;
3468
3469 /* XXX: The pickle.py module does not create instances this way when the
3470 args tuple is empty. See Unpickler._instantiate(). */
3471 if ((r = PyObject_CallObject(cls, args)))
3472 return r;
3473
3474 /* XXX: Is this still nescessary? */
3475 {
3476 PyObject *tp, *v, *tb, *tmp_value;
3477
3478 PyErr_Fetch(&tp, &v, &tb);
3479 tmp_value = v;
3480 /* NULL occurs when there was a KeyboardInterrupt */
3481 if (tmp_value == NULL)
3482 tmp_value = Py_None;
3483 if ((r = PyTuple_Pack(3, tmp_value, cls, args))) {
3484 Py_XDECREF(v);
3485 v = r;
3486 }
3487 PyErr_Restore(tp, v, tb);
3488 }
3489 return NULL;
3490}
3491
3492static int
3493load_obj(UnpicklerObject *self)
3494{
3495 PyObject *cls, *args, *obj = NULL;
3496 int i;
3497
3498 if ((i = marker(self)) < 0)
3499 return -1;
3500
3501 args = Pdata_poptuple(self->stack, i + 1);
3502 if (args == NULL)
3503 return -1;
3504
3505 PDATA_POP(self->stack, cls);
3506 if (cls) {
3507 obj = instantiate(cls, args);
3508 Py_DECREF(cls);
3509 }
3510 Py_DECREF(args);
3511 if (obj == NULL)
3512 return -1;
3513
3514 PDATA_PUSH(self->stack, obj, -1);
3515 return 0;
3516}
3517
3518static int
3519load_inst(UnpicklerObject *self)
3520{
3521 PyObject *cls = NULL;
3522 PyObject *args = NULL;
3523 PyObject *obj = NULL;
3524 PyObject *module_name;
3525 PyObject *class_name;
3526 Py_ssize_t len;
3527 int i;
3528 char *s;
3529
3530 if ((i = marker(self)) < 0)
3531 return -1;
3532 if ((len = unpickler_readline(self, &s)) < 0)
3533 return -1;
3534 if (len < 2)
3535 return bad_readline();
3536
3537 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
3538 identifiers are permitted in Python 3.0, since the INST opcode is only
3539 supported by older protocols on Python 2.x. */
3540 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3541 if (module_name == NULL)
3542 return -1;
3543
3544 if ((len = unpickler_readline(self, &s)) >= 0) {
3545 if (len < 2)
3546 return bad_readline();
3547 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3548 if (class_name == NULL) {
3549 cls = find_class(self, module_name, class_name);
3550 Py_DECREF(class_name);
3551 }
3552 }
3553 Py_DECREF(module_name);
3554
3555 if (cls == NULL)
3556 return -1;
3557
3558 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
3559 obj = instantiate(cls, args);
3560 Py_DECREF(args);
3561 }
3562 Py_DECREF(cls);
3563
3564 if (obj == NULL)
3565 return -1;
3566
3567 PDATA_PUSH(self->stack, obj, -1);
3568 return 0;
3569}
3570
3571static int
3572load_newobj(UnpicklerObject *self)
3573{
3574 PyObject *args = NULL;
3575 PyObject *clsraw = NULL;
3576 PyTypeObject *cls; /* clsraw cast to its true type */
3577 PyObject *obj;
3578
3579 /* Stack is ... cls argtuple, and we want to call
3580 * cls.__new__(cls, *argtuple).
3581 */
3582 PDATA_POP(self->stack, args);
3583 if (args == NULL)
3584 goto error;
3585 if (!PyTuple_Check(args)) {
3586 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
3587 goto error;
3588 }
3589
3590 PDATA_POP(self->stack, clsraw);
3591 cls = (PyTypeObject *)clsraw;
3592 if (cls == NULL)
3593 goto error;
3594 if (!PyType_Check(cls)) {
3595 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3596 "isn't a type object");
3597 goto error;
3598 }
3599 if (cls->tp_new == NULL) {
3600 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3601 "has NULL tp_new");
3602 goto error;
3603 }
3604
3605 /* Call __new__. */
3606 obj = cls->tp_new(cls, args, NULL);
3607 if (obj == NULL)
3608 goto error;
3609
3610 Py_DECREF(args);
3611 Py_DECREF(clsraw);
3612 PDATA_PUSH(self->stack, obj, -1);
3613 return 0;
3614
3615 error:
3616 Py_XDECREF(args);
3617 Py_XDECREF(clsraw);
3618 return -1;
3619}
3620
3621static int
3622load_global(UnpicklerObject *self)
3623{
3624 PyObject *global = NULL;
3625 PyObject *module_name;
3626 PyObject *global_name;
3627 Py_ssize_t len;
3628 char *s;
3629
3630 if ((len = unpickler_readline(self, &s)) < 0)
3631 return -1;
3632 if (len < 2)
3633 return bad_readline();
3634 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3635 if (!module_name)
3636 return -1;
3637
3638 if ((len = unpickler_readline(self, &s)) >= 0) {
3639 if (len < 2) {
3640 Py_DECREF(module_name);
3641 return bad_readline();
3642 }
3643 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3644 if (global_name) {
3645 global = find_class(self, module_name, global_name);
3646 Py_DECREF(global_name);
3647 }
3648 }
3649 Py_DECREF(module_name);
3650
3651 if (global == NULL)
3652 return -1;
3653 PDATA_PUSH(self->stack, global, -1);
3654 return 0;
3655}
3656
3657static int
3658load_persid(UnpicklerObject *self)
3659{
3660 PyObject *pid;
3661 Py_ssize_t len;
3662 char *s;
3663
3664 if (self->pers_func) {
3665 if ((len = unpickler_readline(self, &s)) < 0)
3666 return -1;
3667 if (len < 2)
3668 return bad_readline();
3669
3670 pid = PyBytes_FromStringAndSize(s, len - 1);
3671 if (pid == NULL)
3672 return -1;
3673
3674 /* Ugh... this does not leak since unpickler_call() steals the
3675 reference to pid first. */
3676 pid = unpickler_call(self, self->pers_func, pid);
3677 if (pid == NULL)
3678 return -1;
3679
3680 PDATA_PUSH(self->stack, pid, -1);
3681 return 0;
3682 }
3683 else {
3684 PyErr_SetString(UnpicklingError,
3685 "A load persistent id instruction was encountered,\n"
3686 "but no persistent_load function was specified.");
3687 return -1;
3688 }
3689}
3690
3691static int
3692load_binpersid(UnpicklerObject *self)
3693{
3694 PyObject *pid;
3695
3696 if (self->pers_func) {
3697 PDATA_POP(self->stack, pid);
3698 if (pid == NULL)
3699 return -1;
3700
3701 /* Ugh... this does not leak since unpickler_call() steals the
3702 reference to pid first. */
3703 pid = unpickler_call(self, self->pers_func, pid);
3704 if (pid == NULL)
3705 return -1;
3706
3707 PDATA_PUSH(self->stack, pid, -1);
3708 return 0;
3709 }
3710 else {
3711 PyErr_SetString(UnpicklingError,
3712 "A load persistent id instruction was encountered,\n"
3713 "but no persistent_load function was specified.");
3714 return -1;
3715 }
3716}
3717
3718static int
3719load_pop(UnpicklerObject *self)
3720{
Collin Winter8ca69de2009-05-26 16:53:41 +00003721 int len = self->stack->length;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003722
3723 /* Note that we split the (pickle.py) stack into two stacks,
3724 * an object stack and a mark stack. We have to be clever and
3725 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00003726 * mark stack first, and only signalling a stack underflow if
3727 * the object stack is empty and the mark stack doesn't match
3728 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003729 */
Collin Winter8ca69de2009-05-26 16:53:41 +00003730 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003731 self->num_marks--;
Antoine Pitrou01a15ea2010-01-07 17:57:31 +00003732 } else if (len > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003733 len--;
3734 Py_DECREF(self->stack->data[len]);
3735 self->stack->length = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00003736 } else {
3737 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003738 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003739 return 0;
3740}
3741
3742static int
3743load_pop_mark(UnpicklerObject *self)
3744{
3745 int i;
3746
3747 if ((i = marker(self)) < 0)
3748 return -1;
3749
3750 Pdata_clear(self->stack, i);
3751
3752 return 0;
3753}
3754
3755static int
3756load_dup(UnpicklerObject *self)
3757{
3758 PyObject *last;
3759 int len;
3760
3761 if ((len = self->stack->length) <= 0)
3762 return stack_underflow();
3763 last = self->stack->data[len - 1];
3764 PDATA_APPEND(self->stack, last, -1);
3765 return 0;
3766}
3767
3768static int
3769load_get(UnpicklerObject *self)
3770{
3771 PyObject *key, *value;
3772 Py_ssize_t len;
3773 char *s;
3774
3775 if ((len = unpickler_readline(self, &s)) < 0)
3776 return -1;
3777 if (len < 2)
3778 return bad_readline();
3779
3780 key = PyLong_FromString(s, NULL, 10);
3781 if (key == NULL)
3782 return -1;
3783
3784 value = PyDict_GetItemWithError(self->memo, key);
3785 if (value == NULL) {
3786 if (!PyErr_Occurred())
3787 PyErr_SetObject(PyExc_KeyError, key);
3788 Py_DECREF(key);
3789 return -1;
3790 }
3791 Py_DECREF(key);
3792
3793 PDATA_APPEND(self->stack, value, -1);
3794 return 0;
3795}
3796
3797static int
3798load_binget(UnpicklerObject *self)
3799{
3800 PyObject *key, *value;
3801 char *s;
3802
3803 if (unpickler_read(self, &s, 1) < 0)
3804 return -1;
3805
3806 /* Here, the unsigned cast is necessary to avoid negative values. */
3807 key = PyLong_FromLong((long)(unsigned char)s[0]);
3808 if (key == NULL)
3809 return -1;
3810
3811 value = PyDict_GetItemWithError(self->memo, key);
3812 if (value == NULL) {
3813 if (!PyErr_Occurred())
3814 PyErr_SetObject(PyExc_KeyError, key);
3815 Py_DECREF(key);
3816 return -1;
3817 }
3818 Py_DECREF(key);
3819
3820 PDATA_APPEND(self->stack, value, -1);
3821 return 0;
3822}
3823
3824static int
3825load_long_binget(UnpicklerObject *self)
3826{
3827 PyObject *key, *value;
3828 char *s;
3829 long k;
3830
3831 if (unpickler_read(self, &s, 4) < 0)
3832 return -1;
3833
3834 k = (long)(unsigned char)s[0];
3835 k |= (long)(unsigned char)s[1] << 8;
3836 k |= (long)(unsigned char)s[2] << 16;
3837 k |= (long)(unsigned char)s[3] << 24;
3838
3839 key = PyLong_FromLong(k);
3840 if (key == NULL)
3841 return -1;
3842
3843 value = PyDict_GetItemWithError(self->memo, key);
3844 if (value == NULL) {
3845 if (!PyErr_Occurred())
3846 PyErr_SetObject(PyExc_KeyError, key);
3847 Py_DECREF(key);
3848 return -1;
3849 }
3850 Py_DECREF(key);
3851
3852 PDATA_APPEND(self->stack, value, -1);
3853 return 0;
3854}
3855
3856/* Push an object from the extension registry (EXT[124]). nbytes is
3857 * the number of bytes following the opcode, holding the index (code) value.
3858 */
3859static int
3860load_extension(UnpicklerObject *self, int nbytes)
3861{
3862 char *codebytes; /* the nbytes bytes after the opcode */
3863 long code; /* calc_binint returns long */
3864 PyObject *py_code; /* code as a Python int */
3865 PyObject *obj; /* the object to push */
3866 PyObject *pair; /* (module_name, class_name) */
3867 PyObject *module_name, *class_name;
3868
3869 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
3870 if (unpickler_read(self, &codebytes, nbytes) < 0)
3871 return -1;
3872 code = calc_binint(codebytes, nbytes);
3873 if (code <= 0) { /* note that 0 is forbidden */
3874 /* Corrupt or hostile pickle. */
3875 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
3876 return -1;
3877 }
3878
3879 /* Look for the code in the cache. */
3880 py_code = PyLong_FromLong(code);
3881 if (py_code == NULL)
3882 return -1;
3883 obj = PyDict_GetItem(extension_cache, py_code);
3884 if (obj != NULL) {
3885 /* Bingo. */
3886 Py_DECREF(py_code);
3887 PDATA_APPEND(self->stack, obj, -1);
3888 return 0;
3889 }
3890
3891 /* Look up the (module_name, class_name) pair. */
3892 pair = PyDict_GetItem(inverted_registry, py_code);
3893 if (pair == NULL) {
3894 Py_DECREF(py_code);
3895 PyErr_Format(PyExc_ValueError, "unregistered extension "
3896 "code %ld", code);
3897 return -1;
3898 }
3899 /* Since the extension registry is manipulable via Python code,
3900 * confirm that pair is really a 2-tuple of strings.
3901 */
3902 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
3903 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
3904 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
3905 Py_DECREF(py_code);
3906 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
3907 "isn't a 2-tuple of strings", code);
3908 return -1;
3909 }
3910 /* Load the object. */
3911 obj = find_class(self, module_name, class_name);
3912 if (obj == NULL) {
3913 Py_DECREF(py_code);
3914 return -1;
3915 }
3916 /* Cache code -> obj. */
3917 code = PyDict_SetItem(extension_cache, py_code, obj);
3918 Py_DECREF(py_code);
3919 if (code < 0) {
3920 Py_DECREF(obj);
3921 return -1;
3922 }
3923 PDATA_PUSH(self->stack, obj, -1);
3924 return 0;
3925}
3926
3927static int
3928load_put(UnpicklerObject *self)
3929{
3930 PyObject *key, *value;
3931 Py_ssize_t len;
3932 char *s;
3933 int x;
3934
3935 if ((len = unpickler_readline(self, &s)) < 0)
3936 return -1;
3937 if (len < 2)
3938 return bad_readline();
3939 if ((x = self->stack->length) <= 0)
3940 return stack_underflow();
3941
3942 key = PyLong_FromString(s, NULL, 10);
3943 if (key == NULL)
3944 return -1;
3945 value = self->stack->data[x - 1];
3946
3947 x = PyDict_SetItem(self->memo, key, value);
3948 Py_DECREF(key);
3949 return x;
3950}
3951
3952static int
3953load_binput(UnpicklerObject *self)
3954{
3955 PyObject *key, *value;
3956 char *s;
3957 int x;
3958
3959 if (unpickler_read(self, &s, 1) < 0)
3960 return -1;
3961 if ((x = self->stack->length) <= 0)
3962 return stack_underflow();
3963
3964 key = PyLong_FromLong((long)(unsigned char)s[0]);
3965 if (key == NULL)
3966 return -1;
3967 value = self->stack->data[x - 1];
3968
3969 x = PyDict_SetItem(self->memo, key, value);
3970 Py_DECREF(key);
3971 return x;
3972}
3973
3974static int
3975load_long_binput(UnpicklerObject *self)
3976{
3977 PyObject *key, *value;
3978 long k;
3979 char *s;
3980 int x;
3981
3982 if (unpickler_read(self, &s, 4) < 0)
3983 return -1;
3984 if ((x = self->stack->length) <= 0)
3985 return stack_underflow();
3986
3987 k = (long)(unsigned char)s[0];
3988 k |= (long)(unsigned char)s[1] << 8;
3989 k |= (long)(unsigned char)s[2] << 16;
3990 k |= (long)(unsigned char)s[3] << 24;
3991
3992 key = PyLong_FromLong(k);
3993 if (key == NULL)
3994 return -1;
3995 value = self->stack->data[x - 1];
3996
3997 x = PyDict_SetItem(self->memo, key, value);
3998 Py_DECREF(key);
3999 return x;
4000}
4001
4002static int
4003do_append(UnpicklerObject *self, int x)
4004{
4005 PyObject *value;
4006 PyObject *list;
4007 int len, i;
4008
4009 len = self->stack->length;
4010 if (x > len || x <= 0)
4011 return stack_underflow();
4012 if (len == x) /* nothing to do */
4013 return 0;
4014
4015 list = self->stack->data[x - 1];
4016
4017 if (PyList_Check(list)) {
4018 PyObject *slice;
4019 Py_ssize_t list_len;
4020
4021 slice = Pdata_poplist(self->stack, x);
4022 if (!slice)
4023 return -1;
4024 list_len = PyList_GET_SIZE(list);
4025 i = PyList_SetSlice(list, list_len, list_len, slice);
4026 Py_DECREF(slice);
4027 return i;
4028 }
4029 else {
4030 PyObject *append_func;
4031
4032 append_func = PyObject_GetAttrString(list, "append");
4033 if (append_func == NULL)
4034 return -1;
4035 for (i = x; i < len; i++) {
4036 PyObject *result;
4037
4038 value = self->stack->data[i];
4039 result = unpickler_call(self, append_func, value);
4040 if (result == NULL) {
4041 Pdata_clear(self->stack, i + 1);
4042 self->stack->length = x;
4043 return -1;
4044 }
4045 Py_DECREF(result);
4046 }
4047 self->stack->length = x;
4048 }
4049
4050 return 0;
4051}
4052
4053static int
4054load_append(UnpicklerObject *self)
4055{
4056 return do_append(self, self->stack->length - 1);
4057}
4058
4059static int
4060load_appends(UnpicklerObject *self)
4061{
4062 return do_append(self, marker(self));
4063}
4064
4065static int
4066do_setitems(UnpicklerObject *self, int x)
4067{
4068 PyObject *value, *key;
4069 PyObject *dict;
4070 int len, i;
4071 int status = 0;
4072
4073 len = self->stack->length;
4074 if (x > len || x <= 0)
4075 return stack_underflow();
4076 if (len == x) /* nothing to do */
4077 return 0;
4078 if ((len - x) % 2 != 0) {
4079 /* Currupt or hostile pickle -- we never write one like this. */
4080 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
4081 return -1;
4082 }
4083
4084 /* Here, dict does not actually need to be a PyDict; it could be anything
4085 that supports the __setitem__ attribute. */
4086 dict = self->stack->data[x - 1];
4087
4088 for (i = x + 1; i < len; i += 2) {
4089 key = self->stack->data[i - 1];
4090 value = self->stack->data[i];
4091 if (PyObject_SetItem(dict, key, value) < 0) {
4092 status = -1;
4093 break;
4094 }
4095 }
4096
4097 Pdata_clear(self->stack, x);
4098 return status;
4099}
4100
4101static int
4102load_setitem(UnpicklerObject *self)
4103{
4104 return do_setitems(self, self->stack->length - 2);
4105}
4106
4107static int
4108load_setitems(UnpicklerObject *self)
4109{
4110 return do_setitems(self, marker(self));
4111}
4112
4113static int
4114load_build(UnpicklerObject *self)
4115{
4116 PyObject *state, *inst, *slotstate;
4117 PyObject *setstate;
4118 int status = 0;
4119
4120 /* Stack is ... instance, state. We want to leave instance at
4121 * the stack top, possibly mutated via instance.__setstate__(state).
4122 */
4123 if (self->stack->length < 2)
4124 return stack_underflow();
4125
4126 PDATA_POP(self->stack, state);
4127 if (state == NULL)
4128 return -1;
4129
4130 inst = self->stack->data[self->stack->length - 1];
4131
4132 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00004133 if (setstate == NULL) {
4134 if (PyErr_ExceptionMatches(PyExc_AttributeError))
4135 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00004136 else {
4137 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00004138 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00004139 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004140 }
4141 else {
4142 PyObject *result;
4143
4144 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitroud79dc622008-09-05 00:03:33 +00004145 /* Ugh... this does not leak since unpickler_call() steals the
4146 reference to state first. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004147 result = unpickler_call(self, setstate, state);
4148 Py_DECREF(setstate);
4149 if (result == NULL)
4150 return -1;
4151 Py_DECREF(result);
4152 return 0;
4153 }
4154
4155 /* A default __setstate__. First see whether state embeds a
4156 * slot state dict too (a proto 2 addition).
4157 */
4158 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
4159 PyObject *tmp = state;
4160
4161 state = PyTuple_GET_ITEM(tmp, 0);
4162 slotstate = PyTuple_GET_ITEM(tmp, 1);
4163 Py_INCREF(state);
4164 Py_INCREF(slotstate);
4165 Py_DECREF(tmp);
4166 }
4167 else
4168 slotstate = NULL;
4169
4170 /* Set inst.__dict__ from the state dict (if any). */
4171 if (state != Py_None) {
4172 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00004173 PyObject *d_key, *d_value;
4174 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004175
4176 if (!PyDict_Check(state)) {
4177 PyErr_SetString(UnpicklingError, "state is not a dictionary");
4178 goto error;
4179 }
4180 dict = PyObject_GetAttrString(inst, "__dict__");
4181 if (dict == NULL)
4182 goto error;
4183
Antoine Pitroua9f48a02009-05-02 21:41:14 +00004184 i = 0;
4185 while (PyDict_Next(state, &i, &d_key, &d_value)) {
4186 /* normally the keys for instance attributes are
4187 interned. we should try to do that here. */
4188 Py_INCREF(d_key);
4189 if (PyUnicode_CheckExact(d_key))
4190 PyUnicode_InternInPlace(&d_key);
4191 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
4192 Py_DECREF(d_key);
4193 goto error;
4194 }
4195 Py_DECREF(d_key);
4196 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004197 Py_DECREF(dict);
4198 }
4199
4200 /* Also set instance attributes from the slotstate dict (if any). */
4201 if (slotstate != NULL) {
4202 PyObject *d_key, *d_value;
4203 Py_ssize_t i;
4204
4205 if (!PyDict_Check(slotstate)) {
4206 PyErr_SetString(UnpicklingError,
4207 "slot state is not a dictionary");
4208 goto error;
4209 }
4210 i = 0;
4211 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
4212 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
4213 goto error;
4214 }
4215 }
4216
4217 if (0) {
4218 error:
4219 status = -1;
4220 }
4221
4222 Py_DECREF(state);
4223 Py_XDECREF(slotstate);
4224 return status;
4225}
4226
4227static int
4228load_mark(UnpicklerObject *self)
4229{
4230
4231 /* Note that we split the (pickle.py) stack into two stacks, an
4232 * object stack and a mark stack. Here we push a mark onto the
4233 * mark stack.
4234 */
4235
4236 if ((self->num_marks + 1) >= self->marks_size) {
4237 size_t alloc;
4238 int *marks;
4239
4240 /* Use the size_t type to check for overflow. */
4241 alloc = ((size_t)self->num_marks << 1) + 20;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00004242 if (alloc > PY_SSIZE_T_MAX ||
4243 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004244 PyErr_NoMemory();
4245 return -1;
4246 }
4247
4248 if (self->marks == NULL)
4249 marks = (int *)PyMem_Malloc(alloc * sizeof(int));
4250 else
4251 marks = (int *)PyMem_Realloc(self->marks, alloc * sizeof(int));
4252 if (marks == NULL) {
4253 PyErr_NoMemory();
4254 return -1;
4255 }
4256 self->marks = marks;
4257 self->marks_size = (Py_ssize_t)alloc;
4258 }
4259
4260 self->marks[self->num_marks++] = self->stack->length;
4261
4262 return 0;
4263}
4264
4265static int
4266load_reduce(UnpicklerObject *self)
4267{
4268 PyObject *callable = NULL;
4269 PyObject *argtup = NULL;
4270 PyObject *obj = NULL;
4271
4272 PDATA_POP(self->stack, argtup);
4273 if (argtup == NULL)
4274 return -1;
4275 PDATA_POP(self->stack, callable);
4276 if (callable) {
4277 obj = instantiate(callable, argtup);
4278 Py_DECREF(callable);
4279 }
4280 Py_DECREF(argtup);
4281
4282 if (obj == NULL)
4283 return -1;
4284
4285 PDATA_PUSH(self->stack, obj, -1);
4286 return 0;
4287}
4288
4289/* Just raises an error if we don't know the protocol specified. PROTO
4290 * is the first opcode for protocols >= 2.
4291 */
4292static int
4293load_proto(UnpicklerObject *self)
4294{
4295 char *s;
4296 int i;
4297
4298 if (unpickler_read(self, &s, 1) < 0)
4299 return -1;
4300
4301 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004302 if (i <= HIGHEST_PROTOCOL) {
4303 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004304 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004305 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004306
4307 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
4308 return -1;
4309}
4310
4311static PyObject *
4312load(UnpicklerObject *self)
4313{
4314 PyObject *err;
4315 PyObject *value = NULL;
4316 char *s;
4317
4318 self->num_marks = 0;
4319 if (self->stack->length)
4320 Pdata_clear(self->stack, 0);
4321
4322 /* Convenient macros for the dispatch while-switch loop just below. */
4323#define OP(opcode, load_func) \
4324 case opcode: if (load_func(self) < 0) break; continue;
4325
4326#define OP_ARG(opcode, load_func, arg) \
4327 case opcode: if (load_func(self, (arg)) < 0) break; continue;
4328
4329 while (1) {
4330 if (unpickler_read(self, &s, 1) < 0)
4331 break;
4332
4333 switch ((enum opcode)s[0]) {
4334 OP(NONE, load_none)
4335 OP(BININT, load_binint)
4336 OP(BININT1, load_binint1)
4337 OP(BININT2, load_binint2)
4338 OP(INT, load_int)
4339 OP(LONG, load_long)
4340 OP_ARG(LONG1, load_counted_long, 1)
4341 OP_ARG(LONG4, load_counted_long, 4)
4342 OP(FLOAT, load_float)
4343 OP(BINFLOAT, load_binfloat)
4344 OP(BINBYTES, load_binbytes)
4345 OP(SHORT_BINBYTES, load_short_binbytes)
4346 OP(BINSTRING, load_binstring)
4347 OP(SHORT_BINSTRING, load_short_binstring)
4348 OP(STRING, load_string)
4349 OP(UNICODE, load_unicode)
4350 OP(BINUNICODE, load_binunicode)
4351 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
4352 OP_ARG(TUPLE1, load_counted_tuple, 1)
4353 OP_ARG(TUPLE2, load_counted_tuple, 2)
4354 OP_ARG(TUPLE3, load_counted_tuple, 3)
4355 OP(TUPLE, load_tuple)
4356 OP(EMPTY_LIST, load_empty_list)
4357 OP(LIST, load_list)
4358 OP(EMPTY_DICT, load_empty_dict)
4359 OP(DICT, load_dict)
4360 OP(OBJ, load_obj)
4361 OP(INST, load_inst)
4362 OP(NEWOBJ, load_newobj)
4363 OP(GLOBAL, load_global)
4364 OP(APPEND, load_append)
4365 OP(APPENDS, load_appends)
4366 OP(BUILD, load_build)
4367 OP(DUP, load_dup)
4368 OP(BINGET, load_binget)
4369 OP(LONG_BINGET, load_long_binget)
4370 OP(GET, load_get)
4371 OP(MARK, load_mark)
4372 OP(BINPUT, load_binput)
4373 OP(LONG_BINPUT, load_long_binput)
4374 OP(PUT, load_put)
4375 OP(POP, load_pop)
4376 OP(POP_MARK, load_pop_mark)
4377 OP(SETITEM, load_setitem)
4378 OP(SETITEMS, load_setitems)
4379 OP(PERSID, load_persid)
4380 OP(BINPERSID, load_binpersid)
4381 OP(REDUCE, load_reduce)
4382 OP(PROTO, load_proto)
4383 OP_ARG(EXT1, load_extension, 1)
4384 OP_ARG(EXT2, load_extension, 2)
4385 OP_ARG(EXT4, load_extension, 4)
4386 OP_ARG(NEWTRUE, load_bool, Py_True)
4387 OP_ARG(NEWFALSE, load_bool, Py_False)
4388
4389 case STOP:
4390 break;
4391
4392 case '\0':
4393 PyErr_SetNone(PyExc_EOFError);
4394 return NULL;
4395
4396 default:
4397 PyErr_Format(UnpicklingError,
4398 "invalid load key, '%c'.", s[0]);
4399 return NULL;
4400 }
4401
4402 break; /* and we are done! */
4403 }
4404
4405 /* XXX: It is not clear what this is actually for. */
4406 if ((err = PyErr_Occurred())) {
4407 if (err == PyExc_EOFError) {
4408 PyErr_SetNone(PyExc_EOFError);
4409 }
4410 return NULL;
4411 }
4412
4413 PDATA_POP(self->stack, value);
4414 return value;
4415}
4416
4417PyDoc_STRVAR(Unpickler_load_doc,
4418"load() -> object. Load a pickle."
4419"\n"
4420"Read a pickled object representation from the open file object given in\n"
4421"the constructor, and return the reconstituted object hierarchy specified\n"
4422"therein.\n");
4423
4424static PyObject *
4425Unpickler_load(UnpicklerObject *self)
4426{
4427 /* Check whether the Unpickler was initialized correctly. This prevents
4428 segfaulting if a subclass overridden __init__ with a function that does
4429 not call Unpickler.__init__(). Here, we simply ensure that self->read
4430 is not NULL. */
4431 if (self->read == NULL) {
4432 PyErr_Format(UnpicklingError,
4433 "Unpickler.__init__() was not called by %s.__init__()",
4434 Py_TYPE(self)->tp_name);
4435 return NULL;
4436 }
4437
4438 return load(self);
4439}
4440
4441/* The name of find_class() is misleading. In newer pickle protocols, this
4442 function is used for loading any global (i.e., functions), not just
4443 classes. The name is kept only for backward compatibility. */
4444
4445PyDoc_STRVAR(Unpickler_find_class_doc,
4446"find_class(module_name, global_name) -> object.\n"
4447"\n"
4448"Return an object from a specified module, importing the module if\n"
4449"necessary. Subclasses may override this method (e.g. to restrict\n"
4450"unpickling of arbitrary classes and functions).\n"
4451"\n"
4452"This method is called whenever a class or a function object is\n"
4453"needed. Both arguments passed are str objects.\n");
4454
4455static PyObject *
4456Unpickler_find_class(UnpicklerObject *self, PyObject *args)
4457{
4458 PyObject *global;
4459 PyObject *modules_dict;
4460 PyObject *module;
4461 PyObject *module_name, *global_name;
4462
4463 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
4464 &module_name, &global_name))
4465 return NULL;
4466
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004467 /* Try to map the old names used in Python 2.x to the new ones used in
4468 Python 3.x. We do this only with old pickle protocols and when the
4469 user has not disabled the feature. */
4470 if (self->proto < 3 && self->fix_imports) {
4471 PyObject *key;
4472 PyObject *item;
4473
4474 /* Check if the global (i.e., a function or a class) was renamed
4475 or moved to another module. */
4476 key = PyTuple_Pack(2, module_name, global_name);
4477 if (key == NULL)
4478 return NULL;
4479 item = PyDict_GetItemWithError(name_mapping_2to3, key);
4480 Py_DECREF(key);
4481 if (item) {
4482 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
4483 PyErr_Format(PyExc_RuntimeError,
4484 "_compat_pickle.NAME_MAPPING values should be "
4485 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
4486 return NULL;
4487 }
4488 module_name = PyTuple_GET_ITEM(item, 0);
4489 global_name = PyTuple_GET_ITEM(item, 1);
4490 if (!PyUnicode_Check(module_name) ||
4491 !PyUnicode_Check(global_name)) {
4492 PyErr_Format(PyExc_RuntimeError,
4493 "_compat_pickle.NAME_MAPPING values should be "
4494 "pairs of str, not (%.200s, %.200s)",
4495 Py_TYPE(module_name)->tp_name,
4496 Py_TYPE(global_name)->tp_name);
4497 return NULL;
4498 }
4499 }
4500 else if (PyErr_Occurred()) {
4501 return NULL;
4502 }
4503
4504 /* Check if the module was renamed. */
4505 item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
4506 if (item) {
4507 if (!PyUnicode_Check(item)) {
4508 PyErr_Format(PyExc_RuntimeError,
4509 "_compat_pickle.IMPORT_MAPPING values should be "
4510 "strings, not %.200s", Py_TYPE(item)->tp_name);
4511 return NULL;
4512 }
4513 module_name = item;
4514 }
4515 else if (PyErr_Occurred()) {
4516 return NULL;
4517 }
4518 }
4519
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004520 modules_dict = PySys_GetObject("modules");
4521 if (modules_dict == NULL)
4522 return NULL;
4523
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004524 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004525 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004526 if (PyErr_Occurred())
4527 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004528 module = PyImport_Import(module_name);
4529 if (module == NULL)
4530 return NULL;
4531 global = PyObject_GetAttr(module, global_name);
4532 Py_DECREF(module);
4533 }
4534 else {
4535 global = PyObject_GetAttr(module, global_name);
4536 }
4537 return global;
4538}
4539
4540static struct PyMethodDef Unpickler_methods[] = {
4541 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
4542 Unpickler_load_doc},
4543 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
4544 Unpickler_find_class_doc},
4545 {NULL, NULL} /* sentinel */
4546};
4547
4548static void
4549Unpickler_dealloc(UnpicklerObject *self)
4550{
4551 PyObject_GC_UnTrack((PyObject *)self);
4552 Py_XDECREF(self->readline);
4553 Py_XDECREF(self->read);
4554 Py_XDECREF(self->memo);
4555 Py_XDECREF(self->stack);
4556 Py_XDECREF(self->pers_func);
4557 Py_XDECREF(self->arg);
4558 Py_XDECREF(self->last_string);
4559
4560 PyMem_Free(self->marks);
4561 free(self->encoding);
4562 free(self->errors);
4563
4564 Py_TYPE(self)->tp_free((PyObject *)self);
4565}
4566
4567static int
4568Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
4569{
4570 Py_VISIT(self->readline);
4571 Py_VISIT(self->read);
4572 Py_VISIT(self->memo);
4573 Py_VISIT(self->stack);
4574 Py_VISIT(self->pers_func);
4575 Py_VISIT(self->arg);
4576 Py_VISIT(self->last_string);
4577 return 0;
4578}
4579
4580static int
4581Unpickler_clear(UnpicklerObject *self)
4582{
4583 Py_CLEAR(self->readline);
4584 Py_CLEAR(self->read);
4585 Py_CLEAR(self->memo);
4586 Py_CLEAR(self->stack);
4587 Py_CLEAR(self->pers_func);
4588 Py_CLEAR(self->arg);
4589 Py_CLEAR(self->last_string);
4590
4591 PyMem_Free(self->marks);
4592 self->marks = NULL;
4593 free(self->encoding);
4594 self->encoding = NULL;
4595 free(self->errors);
4596 self->errors = NULL;
4597
4598 return 0;
4599}
4600
4601PyDoc_STRVAR(Unpickler_doc,
4602"Unpickler(file, *, encoding='ASCII', errors='strict')"
4603"\n"
4604"This takes a binary file for reading a pickle data stream.\n"
4605"\n"
4606"The protocol version of the pickle is detected automatically, so no\n"
4607"proto argument is needed.\n"
4608"\n"
4609"The file-like object must have two methods, a read() method\n"
4610"that takes an integer argument, and a readline() method that\n"
4611"requires no arguments. Both methods should return bytes.\n"
4612"Thus file-like object can be a binary file object opened for\n"
4613"reading, a BytesIO object, or any other custom object that\n"
4614"meets this interface.\n"
4615"\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004616"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
4617"which are used to control compatiblity support for pickle stream\n"
4618"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
4619"map the old Python 2.x names to the new names used in Python 3.x. The\n"
4620"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
4621"instances pickled by Python 2.x; these default to 'ASCII' and\n"
4622"'strict', respectively.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004623
4624static int
4625Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
4626{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004627 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004628 PyObject *file;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004629 int fix_imports = 1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004630 char *encoding = NULL;
4631 char *errors = NULL;
4632
4633 /* XXX: That is an horrible error message. But, I don't know how to do
4634 better... */
4635 if (Py_SIZE(args) != 1) {
4636 PyErr_Format(PyExc_TypeError,
4637 "%s takes exactly one positional argument (%zd given)",
4638 Py_TYPE(self)->tp_name, Py_SIZE(args));
4639 return -1;
4640 }
4641
4642 /* Arguments parsing needs to be done in the __init__() method to allow
4643 subclasses to define their own __init__() method, which may (or may
4644 not) support Unpickler arguments. However, this means we need to be
4645 extra careful in the other Unpickler methods, since a subclass could
4646 forget to call Unpickler.__init__() thus breaking our internal
4647 invariants. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004648 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|iss:Unpickler", kwlist,
4649 &file, &fix_imports, &encoding, &errors))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004650 return -1;
4651
4652 /* In case of multiple __init__() calls, clear previous content. */
4653 if (self->read != NULL)
4654 (void)Unpickler_clear(self);
4655
4656 self->read = PyObject_GetAttrString(file, "read");
4657 self->readline = PyObject_GetAttrString(file, "readline");
4658 if (self->readline == NULL || self->read == NULL)
4659 return -1;
4660
4661 if (encoding == NULL)
4662 encoding = "ASCII";
4663 if (errors == NULL)
4664 errors = "strict";
4665
4666 self->encoding = strdup(encoding);
4667 self->errors = strdup(errors);
4668 if (self->encoding == NULL || self->errors == NULL) {
4669 PyErr_NoMemory();
4670 return -1;
4671 }
4672
4673 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
4674 self->pers_func = PyObject_GetAttrString((PyObject *)self,
4675 "persistent_load");
4676 if (self->pers_func == NULL)
4677 return -1;
4678 }
4679 else {
4680 self->pers_func = NULL;
4681 }
4682
4683 self->stack = (Pdata *)Pdata_New();
4684 if (self->stack == NULL)
4685 return -1;
4686
4687 self->memo = PyDict_New();
4688 if (self->memo == NULL)
4689 return -1;
4690
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00004691 self->last_string = NULL;
4692 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004693 self->proto = 0;
4694 self->fix_imports = fix_imports;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00004695
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004696 return 0;
4697}
4698
4699static PyObject *
4700Unpickler_get_memo(UnpicklerObject *self)
4701{
4702 if (self->memo == NULL)
4703 PyErr_SetString(PyExc_AttributeError, "memo");
4704 else
4705 Py_INCREF(self->memo);
4706 return self->memo;
4707}
4708
4709static int
4710Unpickler_set_memo(UnpicklerObject *self, PyObject *value)
4711{
4712 PyObject *tmp;
4713
4714 if (value == NULL) {
4715 PyErr_SetString(PyExc_TypeError,
4716 "attribute deletion is not supported");
4717 return -1;
4718 }
4719 if (!PyDict_Check(value)) {
4720 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
4721 return -1;
4722 }
4723
4724 tmp = self->memo;
4725 Py_INCREF(value);
4726 self->memo = value;
4727 Py_XDECREF(tmp);
4728
4729 return 0;
4730}
4731
4732static PyObject *
4733Unpickler_get_persload(UnpicklerObject *self)
4734{
4735 if (self->pers_func == NULL)
4736 PyErr_SetString(PyExc_AttributeError, "persistent_load");
4737 else
4738 Py_INCREF(self->pers_func);
4739 return self->pers_func;
4740}
4741
4742static int
4743Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
4744{
4745 PyObject *tmp;
4746
4747 if (value == NULL) {
4748 PyErr_SetString(PyExc_TypeError,
4749 "attribute deletion is not supported");
4750 return -1;
4751 }
4752 if (!PyCallable_Check(value)) {
4753 PyErr_SetString(PyExc_TypeError,
4754 "persistent_load must be a callable taking "
4755 "one argument");
4756 return -1;
4757 }
4758
4759 tmp = self->pers_func;
4760 Py_INCREF(value);
4761 self->pers_func = value;
4762 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
4763
4764 return 0;
4765}
4766
4767static PyGetSetDef Unpickler_getsets[] = {
4768 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
4769 {"persistent_load", (getter)Unpickler_get_persload,
4770 (setter)Unpickler_set_persload},
4771 {NULL}
4772};
4773
4774static PyTypeObject Unpickler_Type = {
4775 PyVarObject_HEAD_INIT(NULL, 0)
4776 "_pickle.Unpickler", /*tp_name*/
4777 sizeof(UnpicklerObject), /*tp_basicsize*/
4778 0, /*tp_itemsize*/
4779 (destructor)Unpickler_dealloc, /*tp_dealloc*/
4780 0, /*tp_print*/
4781 0, /*tp_getattr*/
4782 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00004783 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004784 0, /*tp_repr*/
4785 0, /*tp_as_number*/
4786 0, /*tp_as_sequence*/
4787 0, /*tp_as_mapping*/
4788 0, /*tp_hash*/
4789 0, /*tp_call*/
4790 0, /*tp_str*/
4791 0, /*tp_getattro*/
4792 0, /*tp_setattro*/
4793 0, /*tp_as_buffer*/
4794 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4795 Unpickler_doc, /*tp_doc*/
4796 (traverseproc)Unpickler_traverse, /*tp_traverse*/
4797 (inquiry)Unpickler_clear, /*tp_clear*/
4798 0, /*tp_richcompare*/
4799 0, /*tp_weaklistoffset*/
4800 0, /*tp_iter*/
4801 0, /*tp_iternext*/
4802 Unpickler_methods, /*tp_methods*/
4803 0, /*tp_members*/
4804 Unpickler_getsets, /*tp_getset*/
4805 0, /*tp_base*/
4806 0, /*tp_dict*/
4807 0, /*tp_descr_get*/
4808 0, /*tp_descr_set*/
4809 0, /*tp_dictoffset*/
4810 (initproc)Unpickler_init, /*tp_init*/
4811 PyType_GenericAlloc, /*tp_alloc*/
4812 PyType_GenericNew, /*tp_new*/
4813 PyObject_GC_Del, /*tp_free*/
4814 0, /*tp_is_gc*/
4815};
4816
4817static int
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004818initmodule(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004819{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004820 PyObject *copyreg = NULL;
4821 PyObject *compat_pickle = NULL;
4822
4823 /* XXX: We should ensure that the types of the dictionaries imported are
4824 exactly PyDict objects. Otherwise, it is possible to crash the pickle
4825 since we use the PyDict API directly to access these dictionaries. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004826
4827 copyreg = PyImport_ImportModule("copyreg");
4828 if (!copyreg)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004829 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004830 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
4831 if (!dispatch_table)
4832 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004833 extension_registry = \
4834 PyObject_GetAttrString(copyreg, "_extension_registry");
4835 if (!extension_registry)
4836 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004837 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
4838 if (!inverted_registry)
4839 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004840 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
4841 if (!extension_cache)
4842 goto error;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004843 Py_CLEAR(copyreg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004844
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004845 /* Load the 2.x -> 3.x stdlib module mapping tables */
4846 compat_pickle = PyImport_ImportModule("_compat_pickle");
4847 if (!compat_pickle)
4848 goto error;
4849 name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
4850 if (!name_mapping_2to3)
4851 goto error;
4852 if (!PyDict_CheckExact(name_mapping_2to3)) {
4853 PyErr_Format(PyExc_RuntimeError,
4854 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
4855 Py_TYPE(name_mapping_2to3)->tp_name);
4856 goto error;
4857 }
4858 import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
4859 "IMPORT_MAPPING");
4860 if (!import_mapping_2to3)
4861 goto error;
4862 if (!PyDict_CheckExact(import_mapping_2to3)) {
4863 PyErr_Format(PyExc_RuntimeError,
4864 "_compat_pickle.IMPORT_MAPPING should be a dict, "
4865 "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
4866 goto error;
4867 }
4868 /* ... and the 3.x -> 2.x mapping tables */
4869 name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
4870 "REVERSE_NAME_MAPPING");
4871 if (!name_mapping_3to2)
4872 goto error;
4873 if (!PyDict_CheckExact(name_mapping_3to2)) {
4874 PyErr_Format(PyExc_RuntimeError,
4875 "_compat_pickle.REVERSE_NAME_MAPPING shouldbe a dict, "
4876 "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
4877 goto error;
4878 }
4879 import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
4880 "REVERSE_IMPORT_MAPPING");
4881 if (!import_mapping_3to2)
4882 goto error;
4883 if (!PyDict_CheckExact(import_mapping_3to2)) {
4884 PyErr_Format(PyExc_RuntimeError,
4885 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
4886 "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
4887 goto error;
4888 }
4889 Py_CLEAR(compat_pickle);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004890
4891 empty_tuple = PyTuple_New(0);
4892 if (empty_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004893 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004894 two_tuple = PyTuple_New(2);
4895 if (two_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004896 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004897 /* We use this temp container with no regard to refcounts, or to
4898 * keeping containees alive. Exempt from GC, because we don't
4899 * want anything looking at two_tuple() by magic.
4900 */
4901 PyObject_GC_UnTrack(two_tuple);
4902
4903 return 0;
4904
4905 error:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004906 Py_CLEAR(copyreg);
4907 Py_CLEAR(dispatch_table);
4908 Py_CLEAR(extension_registry);
4909 Py_CLEAR(inverted_registry);
4910 Py_CLEAR(extension_cache);
4911 Py_CLEAR(compat_pickle);
4912 Py_CLEAR(name_mapping_2to3);
4913 Py_CLEAR(import_mapping_2to3);
4914 Py_CLEAR(name_mapping_3to2);
4915 Py_CLEAR(import_mapping_3to2);
4916 Py_CLEAR(empty_tuple);
4917 Py_CLEAR(two_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004918 return -1;
4919}
4920
4921static struct PyModuleDef _picklemodule = {
4922 PyModuleDef_HEAD_INIT,
4923 "_pickle",
4924 pickle_module_doc,
4925 -1,
4926 NULL,
4927 NULL,
4928 NULL,
4929 NULL,
4930 NULL
4931};
4932
4933PyMODINIT_FUNC
4934PyInit__pickle(void)
4935{
4936 PyObject *m;
4937
4938 if (PyType_Ready(&Unpickler_Type) < 0)
4939 return NULL;
4940 if (PyType_Ready(&Pickler_Type) < 0)
4941 return NULL;
4942 if (PyType_Ready(&Pdata_Type) < 0)
4943 return NULL;
4944
4945 /* Create the module and add the functions. */
4946 m = PyModule_Create(&_picklemodule);
4947 if (m == NULL)
4948 return NULL;
4949
4950 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
4951 return NULL;
4952 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
4953 return NULL;
4954
4955 /* Initialize the exceptions. */
4956 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
4957 if (PickleError == NULL)
4958 return NULL;
4959 PicklingError = \
4960 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
4961 if (PicklingError == NULL)
4962 return NULL;
4963 UnpicklingError = \
4964 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
4965 if (UnpicklingError == NULL)
4966 return NULL;
4967
4968 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
4969 return NULL;
4970 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
4971 return NULL;
4972 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
4973 return NULL;
4974
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004975 if (initmodule() < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004976 return NULL;
4977
4978 return m;
4979}