blob: 96f194ecd4de4101052172cac2fbc01b2b49fc85 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
13
14/* Pickle opcodes. These must be kept updated with pickle.py.
15 Extensive docs are in pickletools.py. */
16enum opcode {
17 MARK = '(',
18 STOP = '.',
19 POP = '0',
20 POP_MARK = '1',
21 DUP = '2',
22 FLOAT = 'F',
23 INT = 'I',
24 BININT = 'J',
25 BININT1 = 'K',
26 LONG = 'L',
27 BININT2 = 'M',
28 NONE = 'N',
29 PERSID = 'P',
30 BINPERSID = 'Q',
31 REDUCE = 'R',
32 STRING = 'S',
33 BINSTRING = 'T',
34 SHORT_BINSTRING = 'U',
35 UNICODE = 'V',
36 BINUNICODE = 'X',
37 APPEND = 'a',
38 BUILD = 'b',
39 GLOBAL = 'c',
40 DICT = 'd',
41 EMPTY_DICT = '}',
42 APPENDS = 'e',
43 GET = 'g',
44 BINGET = 'h',
45 INST = 'i',
46 LONG_BINGET = 'j',
47 LIST = 'l',
48 EMPTY_LIST = ']',
49 OBJ = 'o',
50 PUT = 'p',
51 BINPUT = 'q',
52 LONG_BINPUT = 'r',
53 SETITEM = 's',
54 TUPLE = 't',
55 EMPTY_TUPLE = ')',
56 SETITEMS = 'u',
57 BINFLOAT = 'G',
58
59 /* Protocol 2. */
60 PROTO = '\x80',
61 NEWOBJ = '\x81',
62 EXT1 = '\x82',
63 EXT2 = '\x83',
64 EXT4 = '\x84',
65 TUPLE1 = '\x85',
66 TUPLE2 = '\x86',
67 TUPLE3 = '\x87',
68 NEWTRUE = '\x88',
69 NEWFALSE = '\x89',
70 LONG1 = '\x8a',
71 LONG4 = '\x8b',
72
73 /* Protocol 3 (Python 3.x) */
74 BINBYTES = 'B',
75 SHORT_BINBYTES = 'C',
76};
77
78/* These aren't opcodes -- they're ways to pickle bools before protocol 2
79 * so that unpicklers written before bools were introduced unpickle them
80 * as ints, but unpicklers after can recognize that bools were intended.
81 * Note that protocol 2 added direct ways to pickle bools.
82 */
83#undef TRUE
84#define TRUE "I01\n"
85#undef FALSE
86#define FALSE "I00\n"
87
88enum {
89 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
90 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
91 break if this gets out of synch with pickle.py, but it's unclear that would
92 help anything either. */
93 BATCHSIZE = 1000,
94
95 /* Nesting limit until Pickler, when running in "fast mode", starts
96 checking for self-referential data-structures. */
97 FAST_NESTING_LIMIT = 50,
98
99 /* Size of the write buffer of Pickler. Higher values will reduce the
100 number of calls to the write() method of the output stream. */
101 WRITE_BUF_SIZE = 256,
102};
103
104/* Exception classes for pickle. These should override the ones defined in
105 pickle.py, when the C-optimized Pickler and Unpickler are used. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000106static PyObject *PickleError = NULL;
107static PyObject *PicklingError = NULL;
108static PyObject *UnpicklingError = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000109
110/* copyreg.dispatch_table, {type_object: pickling_function} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000111static PyObject *dispatch_table = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000112/* For EXT[124] opcodes. */
113/* copyreg._extension_registry, {(module_name, function_name): code} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000114static PyObject *extension_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000115/* copyreg._inverted_registry, {code: (module_name, function_name)} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000116static PyObject *inverted_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000117/* copyreg._extension_cache, {code: object} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000118static PyObject *extension_cache = NULL;
119
120/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
121static PyObject *name_mapping_2to3 = NULL;
122/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
123static PyObject *import_mapping_2to3 = NULL;
124/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
125static PyObject *name_mapping_3to2 = NULL;
126static PyObject *import_mapping_3to2 = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000127
128/* XXX: Are these really nescessary? */
129/* As the name says, an empty tuple. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000130static PyObject *empty_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000131/* For looking up name pairs in copyreg._extension_registry. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000132static PyObject *two_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000133
134static int
135stack_underflow(void)
136{
137 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
138 return -1;
139}
140
141/* Internal data type used as the unpickling stack. */
142typedef struct {
143 PyObject_HEAD
144 int length; /* number of initial slots in data currently used */
145 int size; /* number of slots in data allocated */
146 PyObject **data;
147} Pdata;
148
149static void
150Pdata_dealloc(Pdata *self)
151{
152 int i;
153 PyObject **p;
154
155 for (i = self->length, p = self->data; --i >= 0; p++) {
156 Py_DECREF(*p);
157 }
158 if (self->data)
159 PyMem_Free(self->data);
160 PyObject_Del(self);
161}
162
163static PyTypeObject Pdata_Type = {
164 PyVarObject_HEAD_INIT(NULL, 0)
165 "_pickle.Pdata", /*tp_name*/
166 sizeof(Pdata), /*tp_basicsize*/
167 0, /*tp_itemsize*/
168 (destructor)Pdata_dealloc, /*tp_dealloc*/
169};
170
171static PyObject *
172Pdata_New(void)
173{
174 Pdata *self;
175
176 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
177 return NULL;
178 self->size = 8;
179 self->length = 0;
180 self->data = PyMem_Malloc(self->size * sizeof(PyObject *));
181 if (self->data)
182 return (PyObject *)self;
183 Py_DECREF(self);
184 return PyErr_NoMemory();
185}
186
187
188/* Retain only the initial clearto items. If clearto >= the current
189 * number of items, this is a (non-erroneous) NOP.
190 */
191static int
192Pdata_clear(Pdata *self, int clearto)
193{
194 int i;
195 PyObject **p;
196
197 if (clearto < 0)
198 return stack_underflow();
199 if (clearto >= self->length)
200 return 0;
201
202 for (i = self->length, p = self->data + clearto; --i >= clearto; p++) {
203 Py_CLEAR(*p);
204 }
205 self->length = clearto;
206
207 return 0;
208}
209
210static int
211Pdata_grow(Pdata *self)
212{
213 int bigger;
214 size_t nbytes;
215 PyObject **tmp;
216
217 bigger = (self->size << 1) + 1;
218 if (bigger <= 0) /* was 0, or new value overflows */
219 goto nomemory;
220 if ((int)(size_t)bigger != bigger)
221 goto nomemory;
222 nbytes = (size_t)bigger * sizeof(PyObject *);
223 if (nbytes / sizeof(PyObject *) != (size_t)bigger)
224 goto nomemory;
225 tmp = PyMem_Realloc(self->data, nbytes);
226 if (tmp == NULL)
227 goto nomemory;
228 self->data = tmp;
229 self->size = bigger;
230 return 0;
231
232 nomemory:
233 PyErr_NoMemory();
234 return -1;
235}
236
237/* D is a Pdata*. Pop the topmost element and store it into V, which
238 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
239 * is raised and V is set to NULL.
240 */
241static PyObject *
242Pdata_pop(Pdata *self)
243{
244 if (self->length == 0) {
245 PyErr_SetString(UnpicklingError, "bad pickle data");
246 return NULL;
247 }
248 return self->data[--(self->length)];
249}
250#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
251
252static int
253Pdata_push(Pdata *self, PyObject *obj)
254{
255 if (self->length == self->size && Pdata_grow(self) < 0) {
256 return -1;
257 }
258 self->data[self->length++] = obj;
259 return 0;
260}
261
262/* Push an object on stack, transferring its ownership to the stack. */
263#define PDATA_PUSH(D, O, ER) do { \
264 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
265
266/* Push an object on stack, adding a new reference to the object. */
267#define PDATA_APPEND(D, O, ER) do { \
268 Py_INCREF((O)); \
269 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
270
271static PyObject *
272Pdata_poptuple(Pdata *self, Py_ssize_t start)
273{
274 PyObject *tuple;
275 Py_ssize_t len, i, j;
276
277 len = self->length - start;
278 tuple = PyTuple_New(len);
279 if (tuple == NULL)
280 return NULL;
281 for (i = start, j = 0; j < len; i++, j++)
282 PyTuple_SET_ITEM(tuple, j, self->data[i]);
283
284 self->length = start;
285 return tuple;
286}
287
288static PyObject *
289Pdata_poplist(Pdata *self, Py_ssize_t start)
290{
291 PyObject *list;
292 Py_ssize_t len, i, j;
293
294 len = self->length - start;
295 list = PyList_New(len);
296 if (list == NULL)
297 return NULL;
298 for (i = start, j = 0; j < len; i++, j++)
299 PyList_SET_ITEM(list, j, self->data[i]);
300
301 self->length = start;
302 return list;
303}
304
305typedef struct PicklerObject {
306 PyObject_HEAD
307 PyObject *write; /* write() method of the output stream */
308 PyObject *memo; /* Memo dictionary, keep track of the seen
309 objects to support self-referential objects
310 pickling. */
311 PyObject *pers_func; /* persistent_id() method, can be NULL */
312 PyObject *arg;
313 int proto; /* Pickle protocol number, >= 0 */
314 int bin; /* Boolean, true if proto > 0 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000315 int buf_size; /* Size of the current buffered pickle data */
316 char *write_buf; /* Write buffer, this is to avoid calling the
317 write() method of the output stream too
318 often. */
319 int fast; /* Enable fast mode if set to a true value.
320 The fast mode disable the usage of memo,
321 therefore speeding the pickling process by
322 not generating superfluous PUT opcodes. It
323 should not be used if with self-referential
324 objects. */
325 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000326 int fix_imports; /* Indicate whether Pickler should fix
327 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000328 PyObject *fast_memo;
329} PicklerObject;
330
331typedef struct UnpicklerObject {
332 PyObject_HEAD
333 Pdata *stack; /* Pickle data stack, store unpickled objects. */
334 PyObject *readline; /* readline() method of the output stream */
335 PyObject *read; /* read() method of the output stream */
336 PyObject *memo; /* Memo dictionary, provide the objects stored
337 using the PUT opcodes. */
338 PyObject *arg;
339 PyObject *pers_func; /* persistent_load() method, can be NULL. */
340 PyObject *last_string; /* Reference to the last string read by the
341 readline() method. */
342 char *buffer; /* Reading buffer. */
343 char *encoding; /* Name of the encoding to be used for
344 decoding strings pickled using Python
345 2.x. The default value is "ASCII" */
346 char *errors; /* Name of errors handling scheme to used when
347 decoding strings. The default value is
348 "strict". */
349 int *marks; /* Mark stack, used for unpickling container
350 objects. */
351 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
352 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000353 int proto; /* Protocol of the pickle loaded. */
354 int fix_imports; /* Indicate whether Unpickler should fix
355 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000356} UnpicklerObject;
357
358/* Forward declarations */
359static int save(PicklerObject *, PyObject *, int);
360static int save_reduce(PicklerObject *, PyObject *, PyObject *);
361static PyTypeObject Pickler_Type;
362static PyTypeObject Unpickler_Type;
363
364
365/* Helpers for creating the argument tuple passed to functions. This has the
366 performance advantage of calling PyTuple_New() only once. */
367
368#define ARG_TUP(self, obj) do { \
369 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
370 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
371 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
372 } \
373 else { \
374 Py_DECREF((obj)); \
375 } \
376 } while (0)
377
378#define FREE_ARG_TUP(self) do { \
379 if ((self)->arg->ob_refcnt > 1) \
380 Py_CLEAR((self)->arg); \
381 } while (0)
382
383/* A temporary cleaner API for fast single argument function call.
384
385 XXX: Does caching the argument tuple provides any real performance benefits?
386
387 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
388 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
389 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
390 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
391 (i.e, call PyTuple_New() and store the returned value in an array), to save
392 one second (wall clock time). Either ways, the loading time a pickle stream
393 large enough to generate this number of calls would be massively
394 overwhelmed by other factors, like I/O throughput, the GC traversal and
395 object allocation overhead. So, I really doubt these functions provide any
396 real benefits.
397
398 On the other hand, oprofile reports that pickle spends a lot of time in
399 these functions. But, that is probably more related to the function call
400 overhead, than the argument tuple allocation.
401
402 XXX: And, what is the reference behavior of these? Steal, borrow? At first
403 glance, it seems to steal the reference of 'arg' and borrow the reference
404 of 'func'.
405 */
406static PyObject *
407pickler_call(PicklerObject *self, PyObject *func, PyObject *arg)
408{
409 PyObject *result = NULL;
410
411 ARG_TUP(self, arg);
412 if (self->arg) {
413 result = PyObject_Call(func, self->arg, NULL);
414 FREE_ARG_TUP(self);
415 }
416 return result;
417}
418
419static PyObject *
420unpickler_call(UnpicklerObject *self, PyObject *func, PyObject *arg)
421{
422 PyObject *result = NULL;
423
424 ARG_TUP(self, arg);
425 if (self->arg) {
426 result = PyObject_Call(func, self->arg, NULL);
427 FREE_ARG_TUP(self);
428 }
429 return result;
430}
431
432static Py_ssize_t
433pickler_write(PicklerObject *self, const char *s, Py_ssize_t n)
434{
435 PyObject *data, *result;
436
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000437 if (self->write_buf == NULL) {
438 PyErr_SetString(PyExc_SystemError, "invalid write buffer");
439 return -1;
440 }
441
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000442 if (s == NULL) {
443 if (!(self->buf_size))
444 return 0;
445 data = PyBytes_FromStringAndSize(self->write_buf, self->buf_size);
446 if (data == NULL)
447 return -1;
448 }
449 else {
450 if (self->buf_size && (n + self->buf_size) > WRITE_BUF_SIZE) {
451 if (pickler_write(self, NULL, 0) < 0)
452 return -1;
453 }
454
455 if (n > WRITE_BUF_SIZE) {
456 if (!(data = PyBytes_FromStringAndSize(s, n)))
457 return -1;
458 }
459 else {
460 memcpy(self->write_buf + self->buf_size, s, n);
461 self->buf_size += n;
462 return n;
463 }
464 }
465
466 /* object with write method */
467 result = pickler_call(self, self->write, data);
468 if (result == NULL)
469 return -1;
470
471 Py_DECREF(result);
472 self->buf_size = 0;
473 return n;
474}
475
476/* XXX: These read/readline functions ought to be optimized. Buffered I/O
477 might help a lot, especially with the new (but much slower) io library.
478 On the other hand, the added complexity might not worth it.
479 */
480
481/* Read at least n characters from the input stream and set s to the current
482 reading position. */
483static Py_ssize_t
484unpickler_read(UnpicklerObject *self, char **s, Py_ssize_t n)
485{
486 PyObject *len;
487 PyObject *data;
488
489 len = PyLong_FromSsize_t(n);
490 if (len == NULL)
491 return -1;
492
493 data = unpickler_call(self, self->read, len);
494 if (data == NULL)
495 return -1;
496
497 /* XXX: Should bytearray be supported too? */
498 if (!PyBytes_Check(data)) {
499 PyErr_SetString(PyExc_ValueError,
500 "read() from the underlying stream did not"
501 "return bytes");
Amaury Forgeot d'Arc5f952572008-11-25 21:11:54 +0000502 Py_DECREF(data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000503 return -1;
504 }
505
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000506 if (PyBytes_GET_SIZE(data) != n) {
507 PyErr_SetNone(PyExc_EOFError);
Amaury Forgeot d'Arc5f952572008-11-25 21:11:54 +0000508 Py_DECREF(data);
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000509 return -1;
510 }
511
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000512 Py_XDECREF(self->last_string);
513 self->last_string = data;
514
515 if (!(*s = PyBytes_AS_STRING(data)))
516 return -1;
517
518 return n;
519}
520
521static Py_ssize_t
522unpickler_readline(UnpicklerObject *self, char **s)
523{
524 PyObject *data;
525
526 data = PyObject_CallObject(self->readline, empty_tuple);
527 if (data == NULL)
528 return -1;
529
530 /* XXX: Should bytearray be supported too? */
531 if (!PyBytes_Check(data)) {
532 PyErr_SetString(PyExc_ValueError,
533 "readline() from the underlying stream did not"
534 "return bytes");
535 return -1;
536 }
537
538 Py_XDECREF(self->last_string);
539 self->last_string = data;
540
541 if (!(*s = PyBytes_AS_STRING(data)))
542 return -1;
543
544 return PyBytes_GET_SIZE(data);
545}
546
547/* Generate a GET opcode for an object stored in the memo. The 'key' argument
548 should be the address of the object as returned by PyLong_FromVoidPtr(). */
549static int
550memo_get(PicklerObject *self, PyObject *key)
551{
552 PyObject *value;
553 PyObject *memo_id;
554 long x;
555 char pdata[30];
556 int len;
557
558 value = PyDict_GetItemWithError(self->memo, key);
559 if (value == NULL) {
560 if (!PyErr_Occurred())
561 PyErr_SetObject(PyExc_KeyError, key);
562 return -1;
563 }
564
565 memo_id = PyTuple_GetItem(value, 0);
566 if (memo_id == NULL)
567 return -1;
568
569 if (!PyLong_Check(memo_id)) {
570 PyErr_SetString(PicklingError, "memo id must be an integer");
571 return -1;
572 }
573 x = PyLong_AsLong(memo_id);
574 if (x == -1 && PyErr_Occurred())
575 return -1;
576
577 if (!self->bin) {
578 pdata[0] = GET;
579 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
580 len = (int)strlen(pdata);
581 }
582 else {
583 if (x < 256) {
584 pdata[0] = BINGET;
585 pdata[1] = (unsigned char)(x & 0xff);
586 len = 2;
587 }
588 else if (x <= 0xffffffffL) {
589 pdata[0] = LONG_BINGET;
590 pdata[1] = (unsigned char)(x & 0xff);
591 pdata[2] = (unsigned char)((x >> 8) & 0xff);
592 pdata[3] = (unsigned char)((x >> 16) & 0xff);
593 pdata[4] = (unsigned char)((x >> 24) & 0xff);
594 len = 5;
595 }
596 else { /* unlikely */
597 PyErr_SetString(PicklingError,
598 "memo id too large for LONG_BINGET");
599 return -1;
600 }
601 }
602
603 if (pickler_write(self, pdata, len) < 0)
604 return -1;
605
606 return 0;
607}
608
609/* Store an object in the memo, assign it a new unique ID based on the number
610 of objects currently stored in the memo and generate a PUT opcode. */
611static int
612memo_put(PicklerObject *self, PyObject *obj)
613{
614 PyObject *key = NULL;
615 PyObject *memo_id = NULL;
616 PyObject *tuple = NULL;
617 long x;
618 char pdata[30];
619 int len;
620 int status = 0;
621
622 if (self->fast)
623 return 0;
624
625 key = PyLong_FromVoidPtr(obj);
626 if (key == NULL)
627 goto error;
628 if ((x = PyDict_Size(self->memo)) < 0)
629 goto error;
630 memo_id = PyLong_FromLong(x);
631 if (memo_id == NULL)
632 goto error;
633 tuple = PyTuple_New(2);
634 if (tuple == NULL)
635 goto error;
636
637 Py_INCREF(memo_id);
638 PyTuple_SET_ITEM(tuple, 0, memo_id);
639 Py_INCREF(obj);
640 PyTuple_SET_ITEM(tuple, 1, obj);
641 if (PyDict_SetItem(self->memo, key, tuple) < 0)
642 goto error;
643
644 if (!self->bin) {
645 pdata[0] = PUT;
646 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
647 len = strlen(pdata);
648 }
649 else {
650 if (x < 256) {
651 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +0000652 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000653 len = 2;
654 }
655 else if (x <= 0xffffffffL) {
656 pdata[0] = LONG_BINPUT;
657 pdata[1] = (unsigned char)(x & 0xff);
658 pdata[2] = (unsigned char)((x >> 8) & 0xff);
659 pdata[3] = (unsigned char)((x >> 16) & 0xff);
660 pdata[4] = (unsigned char)((x >> 24) & 0xff);
661 len = 5;
662 }
663 else { /* unlikely */
664 PyErr_SetString(PicklingError,
665 "memo id too large for LONG_BINPUT");
666 return -1;
667 }
668 }
669
670 if (pickler_write(self, pdata, len) < 0)
671 goto error;
672
673 if (0) {
674 error:
675 status = -1;
676 }
677
678 Py_XDECREF(key);
679 Py_XDECREF(memo_id);
680 Py_XDECREF(tuple);
681
682 return status;
683}
684
685static PyObject *
686whichmodule(PyObject *global, PyObject *global_name)
687{
688 Py_ssize_t i, j;
689 static PyObject *module_str = NULL;
690 static PyObject *main_str = NULL;
691 PyObject *module_name;
692 PyObject *modules_dict;
693 PyObject *module;
694 PyObject *obj;
695
696 if (module_str == NULL) {
697 module_str = PyUnicode_InternFromString("__module__");
698 if (module_str == NULL)
699 return NULL;
700 main_str = PyUnicode_InternFromString("__main__");
701 if (main_str == NULL)
702 return NULL;
703 }
704
705 module_name = PyObject_GetAttr(global, module_str);
706
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +0000707 /* In some rare cases (e.g., bound methods of extension types),
708 __module__ can be None. If it is so, then search sys.modules
709 for the module of global. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000710 if (module_name == Py_None) {
711 Py_DECREF(module_name);
712 goto search;
713 }
714
715 if (module_name) {
716 return module_name;
717 }
718 if (PyErr_ExceptionMatches(PyExc_AttributeError))
719 PyErr_Clear();
720 else
721 return NULL;
722
723 search:
724 modules_dict = PySys_GetObject("modules");
725 if (modules_dict == NULL)
726 return NULL;
727
728 i = 0;
729 module_name = NULL;
730 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Mark Dickinson211c6252009-02-01 10:28:51 +0000731 if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000732 continue;
733
734 obj = PyObject_GetAttr(module, global_name);
735 if (obj == NULL) {
736 if (PyErr_ExceptionMatches(PyExc_AttributeError))
737 PyErr_Clear();
738 else
739 return NULL;
740 continue;
741 }
742
743 if (obj != global) {
744 Py_DECREF(obj);
745 continue;
746 }
747
748 Py_DECREF(obj);
749 break;
750 }
751
752 /* If no module is found, use __main__. */
753 if (!j) {
754 module_name = main_str;
755 }
756
757 Py_INCREF(module_name);
758 return module_name;
759}
760
761/* fast_save_enter() and fast_save_leave() are guards against recursive
762 objects when Pickler is used with the "fast mode" (i.e., with object
763 memoization disabled). If the nesting of a list or dict object exceed
764 FAST_NESTING_LIMIT, these guards will start keeping an internal
765 reference to the seen list or dict objects and check whether these objects
766 are recursive. These are not strictly necessary, since save() has a
767 hard-coded recursion limit, but they give a nicer error message than the
768 typical RuntimeError. */
769static int
770fast_save_enter(PicklerObject *self, PyObject *obj)
771{
772 /* if fast_nesting < 0, we're doing an error exit. */
773 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
774 PyObject *key = NULL;
775 if (self->fast_memo == NULL) {
776 self->fast_memo = PyDict_New();
777 if (self->fast_memo == NULL) {
778 self->fast_nesting = -1;
779 return 0;
780 }
781 }
782 key = PyLong_FromVoidPtr(obj);
783 if (key == NULL)
784 return 0;
785 if (PyDict_GetItem(self->fast_memo, key)) {
786 Py_DECREF(key);
787 PyErr_Format(PyExc_ValueError,
788 "fast mode: can't pickle cyclic objects "
789 "including object type %.200s at %p",
790 obj->ob_type->tp_name, obj);
791 self->fast_nesting = -1;
792 return 0;
793 }
794 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
795 Py_DECREF(key);
796 self->fast_nesting = -1;
797 return 0;
798 }
799 Py_DECREF(key);
800 }
801 return 1;
802}
803
804static int
805fast_save_leave(PicklerObject *self, PyObject *obj)
806{
807 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
808 PyObject *key = PyLong_FromVoidPtr(obj);
809 if (key == NULL)
810 return 0;
811 if (PyDict_DelItem(self->fast_memo, key) < 0) {
812 Py_DECREF(key);
813 return 0;
814 }
815 Py_DECREF(key);
816 }
817 return 1;
818}
819
820static int
821save_none(PicklerObject *self, PyObject *obj)
822{
823 const char none_op = NONE;
824 if (pickler_write(self, &none_op, 1) < 0)
825 return -1;
826
827 return 0;
828}
829
830static int
831save_bool(PicklerObject *self, PyObject *obj)
832{
833 static const char *buf[2] = { FALSE, TRUE };
834 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
835 int p = (obj == Py_True);
836
837 if (self->proto >= 2) {
838 const char bool_op = p ? NEWTRUE : NEWFALSE;
839 if (pickler_write(self, &bool_op, 1) < 0)
840 return -1;
841 }
842 else if (pickler_write(self, buf[p], len[p]) < 0)
843 return -1;
844
845 return 0;
846}
847
848static int
849save_int(PicklerObject *self, long x)
850{
851 char pdata[32];
852 int len = 0;
853
854 if (!self->bin
855#if SIZEOF_LONG > 4
856 || x > 0x7fffffffL || x < -0x80000000L
857#endif
858 ) {
859 /* Text-mode pickle, or long too big to fit in the 4-byte
860 * signed BININT format: store as a string.
861 */
Mark Dickinson8dd05142009-01-20 20:43:58 +0000862 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
863 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000864 if (pickler_write(self, pdata, strlen(pdata)) < 0)
865 return -1;
866 }
867 else {
868 /* Binary pickle and x fits in a signed 4-byte int. */
869 pdata[1] = (unsigned char)(x & 0xff);
870 pdata[2] = (unsigned char)((x >> 8) & 0xff);
871 pdata[3] = (unsigned char)((x >> 16) & 0xff);
872 pdata[4] = (unsigned char)((x >> 24) & 0xff);
873
874 if ((pdata[4] == 0) && (pdata[3] == 0)) {
875 if (pdata[2] == 0) {
876 pdata[0] = BININT1;
877 len = 2;
878 }
879 else {
880 pdata[0] = BININT2;
881 len = 3;
882 }
883 }
884 else {
885 pdata[0] = BININT;
886 len = 5;
887 }
888
889 if (pickler_write(self, pdata, len) < 0)
890 return -1;
891 }
892
893 return 0;
894}
895
896static int
897save_long(PicklerObject *self, PyObject *obj)
898{
899 PyObject *repr = NULL;
900 Py_ssize_t size;
901 long val = PyLong_AsLong(obj);
902 int status = 0;
903
904 const char long_op = LONG;
905
906 if (val == -1 && PyErr_Occurred()) {
907 /* out of range for int pickling */
908 PyErr_Clear();
909 }
910 else
911 return save_int(self, val);
912
913 if (self->proto >= 2) {
914 /* Linear-time pickling. */
915 size_t nbits;
916 size_t nbytes;
917 unsigned char *pdata;
918 char header[5];
919 int i;
920 int sign = _PyLong_Sign(obj);
921
922 if (sign == 0) {
923 header[0] = LONG1;
924 header[1] = 0; /* It's 0 -- an empty bytestring. */
925 if (pickler_write(self, header, 2) < 0)
926 goto error;
927 return 0;
928 }
929 nbits = _PyLong_NumBits(obj);
930 if (nbits == (size_t)-1 && PyErr_Occurred())
931 goto error;
932 /* How many bytes do we need? There are nbits >> 3 full
933 * bytes of data, and nbits & 7 leftover bits. If there
934 * are any leftover bits, then we clearly need another
935 * byte. Wnat's not so obvious is that we *probably*
936 * need another byte even if there aren't any leftovers:
937 * the most-significant bit of the most-significant byte
938 * acts like a sign bit, and it's usually got a sense
939 * opposite of the one we need. The exception is longs
940 * of the form -(2**(8*j-1)) for j > 0. Such a long is
941 * its own 256's-complement, so has the right sign bit
942 * even without the extra byte. That's a pain to check
943 * for in advance, though, so we always grab an extra
944 * byte at the start, and cut it back later if possible.
945 */
946 nbytes = (nbits >> 3) + 1;
947 if (nbytes > INT_MAX) {
948 PyErr_SetString(PyExc_OverflowError,
949 "long too large to pickle");
950 goto error;
951 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000952 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000953 if (repr == NULL)
954 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000955 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000956 i = _PyLong_AsByteArray((PyLongObject *)obj,
957 pdata, nbytes,
958 1 /* little endian */ , 1 /* signed */ );
959 if (i < 0)
960 goto error;
961 /* If the long is negative, this may be a byte more than
962 * needed. This is so iff the MSB is all redundant sign
963 * bits.
964 */
965 if (sign < 0 &&
966 nbytes > 1 &&
967 pdata[nbytes - 1] == 0xff &&
968 (pdata[nbytes - 2] & 0x80) != 0) {
969 nbytes--;
970 }
971
972 if (nbytes < 256) {
973 header[0] = LONG1;
974 header[1] = (unsigned char)nbytes;
975 size = 2;
976 }
977 else {
978 header[0] = LONG4;
979 size = (int)nbytes;
980 for (i = 1; i < 5; i++) {
981 header[i] = (unsigned char)(size & 0xff);
982 size >>= 8;
983 }
984 size = 5;
985 }
986 if (pickler_write(self, header, size) < 0 ||
987 pickler_write(self, (char *)pdata, (int)nbytes) < 0)
988 goto error;
989 }
990 else {
991 char *string;
992
Mark Dickinson8dd05142009-01-20 20:43:58 +0000993 /* proto < 2: write the repr and newline. This is quadratic-time (in
994 the number of digits), in both directions. We add a trailing 'L'
995 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000996
997 repr = PyObject_Repr(obj);
998 if (repr == NULL)
999 goto error;
1000
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001001 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001002 if (string == NULL)
1003 goto error;
1004
1005 if (pickler_write(self, &long_op, 1) < 0 ||
1006 pickler_write(self, string, size) < 0 ||
Mark Dickinson8dd05142009-01-20 20:43:58 +00001007 pickler_write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001008 goto error;
1009 }
1010
1011 if (0) {
1012 error:
1013 status = -1;
1014 }
1015 Py_XDECREF(repr);
1016
1017 return status;
1018}
1019
1020static int
1021save_float(PicklerObject *self, PyObject *obj)
1022{
1023 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1024
1025 if (self->bin) {
1026 char pdata[9];
1027 pdata[0] = BINFLOAT;
1028 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1029 return -1;
1030 if (pickler_write(self, pdata, 9) < 0)
1031 return -1;
Eric Smith0923d1d2009-04-16 20:16:10 +00001032 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001033 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001034 int result = -1;
1035 char *buf = NULL;
1036 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001037
Eric Smith0923d1d2009-04-16 20:16:10 +00001038 if (pickler_write(self, &op, 1) < 0)
1039 goto done;
1040
Mark Dickinson3e09f432009-04-17 08:41:23 +00001041 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001042 if (!buf) {
1043 PyErr_NoMemory();
1044 goto done;
1045 }
1046
1047 if (pickler_write(self, buf, strlen(buf)) < 0)
1048 goto done;
1049
1050 if (pickler_write(self, "\n", 1) < 0)
1051 goto done;
1052
1053 result = 0;
1054done:
1055 PyMem_Free(buf);
1056 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001057 }
1058
1059 return 0;
1060}
1061
1062static int
1063save_bytes(PicklerObject *self, PyObject *obj)
1064{
1065 if (self->proto < 3) {
1066 /* Older pickle protocols do not have an opcode for pickling bytes
1067 objects. Therefore, we need to fake the copy protocol (i.e.,
1068 the __reduce__ method) to permit bytes object unpickling. */
1069 PyObject *reduce_value = NULL;
1070 PyObject *bytelist = NULL;
1071 int status;
1072
1073 bytelist = PySequence_List(obj);
1074 if (bytelist == NULL)
1075 return -1;
1076
1077 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1078 bytelist);
1079 if (reduce_value == NULL) {
1080 Py_DECREF(bytelist);
1081 return -1;
1082 }
1083
1084 /* save_reduce() will memoize the object automatically. */
1085 status = save_reduce(self, reduce_value, obj);
1086 Py_DECREF(reduce_value);
1087 Py_DECREF(bytelist);
1088 return status;
1089 }
1090 else {
1091 Py_ssize_t size;
1092 char header[5];
1093 int len;
1094
1095 size = PyBytes_Size(obj);
1096 if (size < 0)
1097 return -1;
1098
1099 if (size < 256) {
1100 header[0] = SHORT_BINBYTES;
1101 header[1] = (unsigned char)size;
1102 len = 2;
1103 }
1104 else if (size <= 0xffffffffL) {
1105 header[0] = BINBYTES;
1106 header[1] = (unsigned char)(size & 0xff);
1107 header[2] = (unsigned char)((size >> 8) & 0xff);
1108 header[3] = (unsigned char)((size >> 16) & 0xff);
1109 header[4] = (unsigned char)((size >> 24) & 0xff);
1110 len = 5;
1111 }
1112 else {
1113 return -1; /* string too large */
1114 }
1115
1116 if (pickler_write(self, header, len) < 0)
1117 return -1;
1118
1119 if (pickler_write(self, PyBytes_AS_STRING(obj), size) < 0)
1120 return -1;
1121
1122 if (memo_put(self, obj) < 0)
1123 return -1;
1124
1125 return 0;
1126 }
1127}
1128
1129/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1130 backslash and newline characters to \uXXXX escapes. */
1131static PyObject *
1132raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1133{
1134 PyObject *repr, *result;
1135 char *p;
1136 char *q;
1137
1138 static const char *hexdigits = "0123456789abcdef";
1139
1140#ifdef Py_UNICODE_WIDE
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001141 const Py_ssize_t expandsize = 10;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001142#else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001143 const Py_ssize_t expandsize = 6;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001144#endif
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001145
1146 if (size > PY_SSIZE_T_MAX / expandsize)
1147 return PyErr_NoMemory();
1148
1149 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001150 if (repr == NULL)
1151 return NULL;
1152 if (size == 0)
1153 goto done;
1154
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001155 p = q = PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001156 while (size-- > 0) {
1157 Py_UNICODE ch = *s++;
1158#ifdef Py_UNICODE_WIDE
1159 /* Map 32-bit characters to '\Uxxxxxxxx' */
1160 if (ch >= 0x10000) {
1161 *p++ = '\\';
1162 *p++ = 'U';
1163 *p++ = hexdigits[(ch >> 28) & 0xf];
1164 *p++ = hexdigits[(ch >> 24) & 0xf];
1165 *p++ = hexdigits[(ch >> 20) & 0xf];
1166 *p++ = hexdigits[(ch >> 16) & 0xf];
1167 *p++ = hexdigits[(ch >> 12) & 0xf];
1168 *p++ = hexdigits[(ch >> 8) & 0xf];
1169 *p++ = hexdigits[(ch >> 4) & 0xf];
1170 *p++ = hexdigits[ch & 15];
1171 }
1172 else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001173#else
1174 /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
1175 if (ch >= 0xD800 && ch < 0xDC00) {
1176 Py_UNICODE ch2;
1177 Py_UCS4 ucs;
1178
1179 ch2 = *s++;
1180 size--;
1181 if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
1182 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
1183 *p++ = '\\';
1184 *p++ = 'U';
1185 *p++ = hexdigits[(ucs >> 28) & 0xf];
1186 *p++ = hexdigits[(ucs >> 24) & 0xf];
1187 *p++ = hexdigits[(ucs >> 20) & 0xf];
1188 *p++ = hexdigits[(ucs >> 16) & 0xf];
1189 *p++ = hexdigits[(ucs >> 12) & 0xf];
1190 *p++ = hexdigits[(ucs >> 8) & 0xf];
1191 *p++ = hexdigits[(ucs >> 4) & 0xf];
1192 *p++ = hexdigits[ucs & 0xf];
1193 continue;
1194 }
1195 /* Fall through: isolated surrogates are copied as-is */
1196 s--;
1197 size++;
1198 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001199#endif
1200 /* Map 16-bit characters to '\uxxxx' */
1201 if (ch >= 256 || ch == '\\' || ch == '\n') {
1202 *p++ = '\\';
1203 *p++ = 'u';
1204 *p++ = hexdigits[(ch >> 12) & 0xf];
1205 *p++ = hexdigits[(ch >> 8) & 0xf];
1206 *p++ = hexdigits[(ch >> 4) & 0xf];
1207 *p++ = hexdigits[ch & 15];
1208 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001209 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001210 else
1211 *p++ = (char) ch;
1212 }
1213 size = p - q;
1214
1215 done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001216 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001217 Py_DECREF(repr);
1218 return result;
1219}
1220
1221static int
1222save_unicode(PicklerObject *self, PyObject *obj)
1223{
1224 Py_ssize_t size;
1225 PyObject *encoded = NULL;
1226
1227 if (self->bin) {
1228 char pdata[5];
1229
Victor Stinner485fb562010-04-13 11:07:24 +00001230 encoded = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj),
1231 PyUnicode_GET_SIZE(obj),
1232 "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001233 if (encoded == NULL)
1234 goto error;
1235
1236 size = PyBytes_GET_SIZE(encoded);
1237 if (size < 0 || size > 0xffffffffL)
1238 goto error; /* string too large */
1239
1240 pdata[0] = BINUNICODE;
1241 pdata[1] = (unsigned char)(size & 0xff);
1242 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1243 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1244 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1245
1246 if (pickler_write(self, pdata, 5) < 0)
1247 goto error;
1248
1249 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1250 goto error;
1251 }
1252 else {
1253 const char unicode_op = UNICODE;
1254
1255 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1256 PyUnicode_GET_SIZE(obj));
1257 if (encoded == NULL)
1258 goto error;
1259
1260 if (pickler_write(self, &unicode_op, 1) < 0)
1261 goto error;
1262
1263 size = PyBytes_GET_SIZE(encoded);
1264 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1265 goto error;
1266
1267 if (pickler_write(self, "\n", 1) < 0)
1268 goto error;
1269 }
1270 if (memo_put(self, obj) < 0)
1271 goto error;
1272
1273 Py_DECREF(encoded);
1274 return 0;
1275
1276 error:
1277 Py_XDECREF(encoded);
1278 return -1;
1279}
1280
1281/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1282static int
1283store_tuple_elements(PicklerObject *self, PyObject *t, int len)
1284{
1285 int i;
1286
1287 assert(PyTuple_Size(t) == len);
1288
1289 for (i = 0; i < len; i++) {
1290 PyObject *element = PyTuple_GET_ITEM(t, i);
1291
1292 if (element == NULL)
1293 return -1;
1294 if (save(self, element, 0) < 0)
1295 return -1;
1296 }
1297
1298 return 0;
1299}
1300
1301/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1302 * used across protocols to minimize the space needed to pickle them.
1303 * Tuples are also the only builtin immutable type that can be recursive
1304 * (a tuple can be reached from itself), and that requires some subtle
1305 * magic so that it works in all cases. IOW, this is a long routine.
1306 */
1307static int
1308save_tuple(PicklerObject *self, PyObject *obj)
1309{
1310 PyObject *memo_key = NULL;
1311 int len, i;
1312 int status = 0;
1313
1314 const char mark_op = MARK;
1315 const char tuple_op = TUPLE;
1316 const char pop_op = POP;
1317 const char pop_mark_op = POP_MARK;
1318 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1319
1320 if ((len = PyTuple_Size(obj)) < 0)
1321 return -1;
1322
1323 if (len == 0) {
1324 char pdata[2];
1325
1326 if (self->proto) {
1327 pdata[0] = EMPTY_TUPLE;
1328 len = 1;
1329 }
1330 else {
1331 pdata[0] = MARK;
1332 pdata[1] = TUPLE;
1333 len = 2;
1334 }
1335 if (pickler_write(self, pdata, len) < 0)
1336 return -1;
1337 return 0;
1338 }
1339
1340 /* id(tuple) isn't in the memo now. If it shows up there after
1341 * saving the tuple elements, the tuple must be recursive, in
1342 * which case we'll pop everything we put on the stack, and fetch
1343 * its value from the memo.
1344 */
1345 memo_key = PyLong_FromVoidPtr(obj);
1346 if (memo_key == NULL)
1347 return -1;
1348
1349 if (len <= 3 && self->proto >= 2) {
1350 /* Use TUPLE{1,2,3} opcodes. */
1351 if (store_tuple_elements(self, obj, len) < 0)
1352 goto error;
1353
1354 if (PyDict_GetItem(self->memo, memo_key)) {
1355 /* pop the len elements */
1356 for (i = 0; i < len; i++)
1357 if (pickler_write(self, &pop_op, 1) < 0)
1358 goto error;
1359 /* fetch from memo */
1360 if (memo_get(self, memo_key) < 0)
1361 goto error;
1362
1363 Py_DECREF(memo_key);
1364 return 0;
1365 }
1366 else { /* Not recursive. */
1367 if (pickler_write(self, len2opcode + len, 1) < 0)
1368 goto error;
1369 }
1370 goto memoize;
1371 }
1372
1373 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
1374 * Generate MARK e1 e2 ... TUPLE
1375 */
1376 if (pickler_write(self, &mark_op, 1) < 0)
1377 goto error;
1378
1379 if (store_tuple_elements(self, obj, len) < 0)
1380 goto error;
1381
1382 if (PyDict_GetItem(self->memo, memo_key)) {
1383 /* pop the stack stuff we pushed */
1384 if (self->bin) {
1385 if (pickler_write(self, &pop_mark_op, 1) < 0)
1386 goto error;
1387 }
1388 else {
1389 /* Note that we pop one more than len, to remove
1390 * the MARK too.
1391 */
1392 for (i = 0; i <= len; i++)
1393 if (pickler_write(self, &pop_op, 1) < 0)
1394 goto error;
1395 }
1396 /* fetch from memo */
1397 if (memo_get(self, memo_key) < 0)
1398 goto error;
1399
1400 Py_DECREF(memo_key);
1401 return 0;
1402 }
1403 else { /* Not recursive. */
1404 if (pickler_write(self, &tuple_op, 1) < 0)
1405 goto error;
1406 }
1407
1408 memoize:
1409 if (memo_put(self, obj) < 0)
1410 goto error;
1411
1412 if (0) {
1413 error:
1414 status = -1;
1415 }
1416
1417 Py_DECREF(memo_key);
1418 return status;
1419}
1420
1421/* iter is an iterator giving items, and we batch up chunks of
1422 * MARK item item ... item APPENDS
1423 * opcode sequences. Calling code should have arranged to first create an
1424 * empty list, or list-like object, for the APPENDS to operate on.
1425 * Returns 0 on success, <0 on error.
1426 */
1427static int
1428batch_list(PicklerObject *self, PyObject *iter)
1429{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001430 PyObject *obj = NULL;
1431 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001432 int i, n;
1433
1434 const char mark_op = MARK;
1435 const char append_op = APPEND;
1436 const char appends_op = APPENDS;
1437
1438 assert(iter != NULL);
1439
1440 /* XXX: I think this function could be made faster by avoiding the
1441 iterator interface and fetching objects directly from list using
1442 PyList_GET_ITEM.
1443 */
1444
1445 if (self->proto == 0) {
1446 /* APPENDS isn't available; do one at a time. */
1447 for (;;) {
1448 obj = PyIter_Next(iter);
1449 if (obj == NULL) {
1450 if (PyErr_Occurred())
1451 return -1;
1452 break;
1453 }
1454 i = save(self, obj, 0);
1455 Py_DECREF(obj);
1456 if (i < 0)
1457 return -1;
1458 if (pickler_write(self, &append_op, 1) < 0)
1459 return -1;
1460 }
1461 return 0;
1462 }
1463
1464 /* proto > 0: write in batches of BATCHSIZE. */
1465 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001466 /* Get first item */
1467 firstitem = PyIter_Next(iter);
1468 if (firstitem == NULL) {
1469 if (PyErr_Occurred())
1470 goto error;
1471
1472 /* nothing more to add */
1473 break;
1474 }
1475
1476 /* Try to get a second item */
1477 obj = PyIter_Next(iter);
1478 if (obj == NULL) {
1479 if (PyErr_Occurred())
1480 goto error;
1481
1482 /* Only one item to write */
1483 if (save(self, firstitem, 0) < 0)
1484 goto error;
1485 if (pickler_write(self, &append_op, 1) < 0)
1486 goto error;
1487 Py_CLEAR(firstitem);
1488 break;
1489 }
1490
1491 /* More than one item to write */
1492
1493 /* Pump out MARK, items, APPENDS. */
1494 if (pickler_write(self, &mark_op, 1) < 0)
1495 goto error;
1496
1497 if (save(self, firstitem, 0) < 0)
1498 goto error;
1499 Py_CLEAR(firstitem);
1500 n = 1;
1501
1502 /* Fetch and save up to BATCHSIZE items */
1503 while (obj) {
1504 if (save(self, obj, 0) < 0)
1505 goto error;
1506 Py_CLEAR(obj);
1507 n += 1;
1508
1509 if (n == BATCHSIZE)
1510 break;
1511
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001512 obj = PyIter_Next(iter);
1513 if (obj == NULL) {
1514 if (PyErr_Occurred())
1515 goto error;
1516 break;
1517 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001518 }
1519
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001520 if (pickler_write(self, &appends_op, 1) < 0)
1521 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001522
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001523 } while (n == BATCHSIZE);
1524 return 0;
1525
1526 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001527 Py_XDECREF(firstitem);
1528 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001529 return -1;
1530}
1531
1532static int
1533save_list(PicklerObject *self, PyObject *obj)
1534{
1535 PyObject *iter;
1536 char header[3];
1537 int len;
1538 int status = 0;
1539
1540 if (self->fast && !fast_save_enter(self, obj))
1541 goto error;
1542
1543 /* Create an empty list. */
1544 if (self->bin) {
1545 header[0] = EMPTY_LIST;
1546 len = 1;
1547 }
1548 else {
1549 header[0] = MARK;
1550 header[1] = LIST;
1551 len = 2;
1552 }
1553
1554 if (pickler_write(self, header, len) < 0)
1555 goto error;
1556
1557 /* Get list length, and bow out early if empty. */
1558 if ((len = PyList_Size(obj)) < 0)
1559 goto error;
1560
1561 if (memo_put(self, obj) < 0)
1562 goto error;
1563
1564 if (len != 0) {
1565 /* Save the list elements. */
1566 iter = PyObject_GetIter(obj);
1567 if (iter == NULL)
1568 goto error;
1569 status = batch_list(self, iter);
1570 Py_DECREF(iter);
1571 }
1572
1573 if (0) {
1574 error:
1575 status = -1;
1576 }
1577
1578 if (self->fast && !fast_save_leave(self, obj))
1579 status = -1;
1580
1581 return status;
1582}
1583
1584/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
1585 * MARK key value ... key value SETITEMS
1586 * opcode sequences. Calling code should have arranged to first create an
1587 * empty dict, or dict-like object, for the SETITEMS to operate on.
1588 * Returns 0 on success, <0 on error.
1589 *
1590 * This is very much like batch_list(). The difference between saving
1591 * elements directly, and picking apart two-tuples, is so long-winded at
1592 * the C level, though, that attempts to combine these routines were too
1593 * ugly to bear.
1594 */
1595static int
1596batch_dict(PicklerObject *self, PyObject *iter)
1597{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001598 PyObject *obj = NULL;
1599 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001600 int i, n;
1601
1602 const char mark_op = MARK;
1603 const char setitem_op = SETITEM;
1604 const char setitems_op = SETITEMS;
1605
1606 assert(iter != NULL);
1607
1608 if (self->proto == 0) {
1609 /* SETITEMS isn't available; do one at a time. */
1610 for (;;) {
1611 obj = PyIter_Next(iter);
1612 if (obj == NULL) {
1613 if (PyErr_Occurred())
1614 return -1;
1615 break;
1616 }
1617 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1618 PyErr_SetString(PyExc_TypeError, "dict items "
1619 "iterator must return 2-tuples");
1620 return -1;
1621 }
1622 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
1623 if (i >= 0)
1624 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
1625 Py_DECREF(obj);
1626 if (i < 0)
1627 return -1;
1628 if (pickler_write(self, &setitem_op, 1) < 0)
1629 return -1;
1630 }
1631 return 0;
1632 }
1633
1634 /* proto > 0: write in batches of BATCHSIZE. */
1635 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001636 /* Get first item */
1637 firstitem = PyIter_Next(iter);
1638 if (firstitem == NULL) {
1639 if (PyErr_Occurred())
1640 goto error;
1641
1642 /* nothing more to add */
1643 break;
1644 }
1645 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
1646 PyErr_SetString(PyExc_TypeError, "dict items "
1647 "iterator must return 2-tuples");
1648 goto error;
1649 }
1650
1651 /* Try to get a second item */
1652 obj = PyIter_Next(iter);
1653 if (obj == NULL) {
1654 if (PyErr_Occurred())
1655 goto error;
1656
1657 /* Only one item to write */
1658 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1659 goto error;
1660 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1661 goto error;
1662 if (pickler_write(self, &setitem_op, 1) < 0)
1663 goto error;
1664 Py_CLEAR(firstitem);
1665 break;
1666 }
1667
1668 /* More than one item to write */
1669
1670 /* Pump out MARK, items, SETITEMS. */
1671 if (pickler_write(self, &mark_op, 1) < 0)
1672 goto error;
1673
1674 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1675 goto error;
1676 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1677 goto error;
1678 Py_CLEAR(firstitem);
1679 n = 1;
1680
1681 /* Fetch and save up to BATCHSIZE items */
1682 while (obj) {
1683 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1684 PyErr_SetString(PyExc_TypeError, "dict items "
1685 "iterator must return 2-tuples");
1686 goto error;
1687 }
1688 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
1689 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
1690 goto error;
1691 Py_CLEAR(obj);
1692 n += 1;
1693
1694 if (n == BATCHSIZE)
1695 break;
1696
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001697 obj = PyIter_Next(iter);
1698 if (obj == NULL) {
1699 if (PyErr_Occurred())
1700 goto error;
1701 break;
1702 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001703 }
1704
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001705 if (pickler_write(self, &setitems_op, 1) < 0)
1706 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001707
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001708 } while (n == BATCHSIZE);
1709 return 0;
1710
1711 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001712 Py_XDECREF(firstitem);
1713 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001714 return -1;
1715}
1716
Collin Winter5c9b02d2009-05-25 05:43:30 +00001717/* This is a variant of batch_dict() above that specializes for dicts, with no
1718 * support for dict subclasses. Like batch_dict(), we batch up chunks of
1719 * MARK key value ... key value SETITEMS
1720 * opcode sequences. Calling code should have arranged to first create an
1721 * empty dict, or dict-like object, for the SETITEMS to operate on.
1722 * Returns 0 on success, -1 on error.
1723 *
1724 * Note that this currently doesn't work for protocol 0.
1725 */
1726static int
1727batch_dict_exact(PicklerObject *self, PyObject *obj)
1728{
1729 PyObject *key = NULL, *value = NULL;
1730 int i;
1731 Py_ssize_t dict_size, ppos = 0;
1732
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00001733 const char mark_op = MARK;
1734 const char setitem_op = SETITEM;
1735 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00001736
1737 assert(obj != NULL);
1738 assert(self->proto > 0);
1739
1740 dict_size = PyDict_Size(obj);
1741
1742 /* Special-case len(d) == 1 to save space. */
1743 if (dict_size == 1) {
1744 PyDict_Next(obj, &ppos, &key, &value);
1745 if (save(self, key, 0) < 0)
1746 return -1;
1747 if (save(self, value, 0) < 0)
1748 return -1;
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00001749 if (pickler_write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00001750 return -1;
1751 return 0;
1752 }
1753
1754 /* Write in batches of BATCHSIZE. */
1755 do {
1756 i = 0;
1757 if (pickler_write(self, &mark_op, 1) < 0)
1758 return -1;
1759 while (PyDict_Next(obj, &ppos, &key, &value)) {
1760 if (save(self, key, 0) < 0)
1761 return -1;
1762 if (save(self, value, 0) < 0)
1763 return -1;
1764 if (++i == BATCHSIZE)
1765 break;
1766 }
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00001767 if (pickler_write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00001768 return -1;
1769 if (PyDict_Size(obj) != dict_size) {
1770 PyErr_Format(
1771 PyExc_RuntimeError,
1772 "dictionary changed size during iteration");
1773 return -1;
1774 }
1775
1776 } while (i == BATCHSIZE);
1777 return 0;
1778}
1779
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001780static int
1781save_dict(PicklerObject *self, PyObject *obj)
1782{
1783 PyObject *items, *iter;
1784 char header[3];
1785 int len;
1786 int status = 0;
1787
1788 if (self->fast && !fast_save_enter(self, obj))
1789 goto error;
1790
1791 /* Create an empty dict. */
1792 if (self->bin) {
1793 header[0] = EMPTY_DICT;
1794 len = 1;
1795 }
1796 else {
1797 header[0] = MARK;
1798 header[1] = DICT;
1799 len = 2;
1800 }
1801
1802 if (pickler_write(self, header, len) < 0)
1803 goto error;
1804
1805 /* Get dict size, and bow out early if empty. */
1806 if ((len = PyDict_Size(obj)) < 0)
1807 goto error;
1808
1809 if (memo_put(self, obj) < 0)
1810 goto error;
1811
1812 if (len != 0) {
1813 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00001814 if (PyDict_CheckExact(obj) && self->proto > 0) {
1815 /* We can take certain shortcuts if we know this is a dict and
1816 not a dict subclass. */
1817 if (Py_EnterRecursiveCall(" while pickling an object") == 0) {
1818 status = batch_dict_exact(self, obj);
1819 Py_LeaveRecursiveCall();
1820 }
1821 } else {
1822 items = PyObject_CallMethod(obj, "items", "()");
1823 if (items == NULL)
1824 goto error;
1825 iter = PyObject_GetIter(items);
1826 Py_DECREF(items);
1827 if (iter == NULL)
1828 goto error;
1829 status = batch_dict(self, iter);
1830 Py_DECREF(iter);
1831 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001832 }
1833
1834 if (0) {
1835 error:
1836 status = -1;
1837 }
1838
1839 if (self->fast && !fast_save_leave(self, obj))
1840 status = -1;
1841
1842 return status;
1843}
1844
1845static int
1846save_global(PicklerObject *self, PyObject *obj, PyObject *name)
1847{
1848 static PyObject *name_str = NULL;
1849 PyObject *global_name = NULL;
1850 PyObject *module_name = NULL;
1851 PyObject *module = NULL;
1852 PyObject *cls;
1853 int status = 0;
1854
1855 const char global_op = GLOBAL;
1856
1857 if (name_str == NULL) {
1858 name_str = PyUnicode_InternFromString("__name__");
1859 if (name_str == NULL)
1860 goto error;
1861 }
1862
1863 if (name) {
1864 global_name = name;
1865 Py_INCREF(global_name);
1866 }
1867 else {
1868 global_name = PyObject_GetAttr(obj, name_str);
1869 if (global_name == NULL)
1870 goto error;
1871 }
1872
1873 module_name = whichmodule(obj, global_name);
1874 if (module_name == NULL)
1875 goto error;
1876
1877 /* XXX: Change to use the import C API directly with level=0 to disallow
1878 relative imports.
1879
1880 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
1881 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
1882 custom import functions (IMHO, this would be a nice security
1883 feature). The import C API would need to be extended to support the
1884 extra parameters of __import__ to fix that. */
1885 module = PyImport_Import(module_name);
1886 if (module == NULL) {
1887 PyErr_Format(PicklingError,
1888 "Can't pickle %R: import of module %R failed",
1889 obj, module_name);
1890 goto error;
1891 }
1892 cls = PyObject_GetAttr(module, global_name);
1893 if (cls == NULL) {
1894 PyErr_Format(PicklingError,
1895 "Can't pickle %R: attribute lookup %S.%S failed",
1896 obj, module_name, global_name);
1897 goto error;
1898 }
1899 if (cls != obj) {
1900 Py_DECREF(cls);
1901 PyErr_Format(PicklingError,
1902 "Can't pickle %R: it's not the same object as %S.%S",
1903 obj, module_name, global_name);
1904 goto error;
1905 }
1906 Py_DECREF(cls);
1907
1908 if (self->proto >= 2) {
1909 /* See whether this is in the extension registry, and if
1910 * so generate an EXT opcode.
1911 */
1912 PyObject *code_obj; /* extension code as Python object */
1913 long code; /* extension code as C value */
1914 char pdata[5];
1915 int n;
1916
1917 PyTuple_SET_ITEM(two_tuple, 0, module_name);
1918 PyTuple_SET_ITEM(two_tuple, 1, global_name);
1919 code_obj = PyDict_GetItem(extension_registry, two_tuple);
1920 /* The object is not registered in the extension registry.
1921 This is the most likely code path. */
1922 if (code_obj == NULL)
1923 goto gen_global;
1924
1925 /* XXX: pickle.py doesn't check neither the type, nor the range
1926 of the value returned by the extension_registry. It should for
1927 consistency. */
1928
1929 /* Verify code_obj has the right type and value. */
1930 if (!PyLong_Check(code_obj)) {
1931 PyErr_Format(PicklingError,
1932 "Can't pickle %R: extension code %R isn't an integer",
1933 obj, code_obj);
1934 goto error;
1935 }
1936 code = PyLong_AS_LONG(code_obj);
1937 if (code <= 0 || code > 0x7fffffffL) {
1938 PyErr_Format(PicklingError,
1939 "Can't pickle %R: extension code %ld is out of range",
1940 obj, code);
1941 goto error;
1942 }
1943
1944 /* Generate an EXT opcode. */
1945 if (code <= 0xff) {
1946 pdata[0] = EXT1;
1947 pdata[1] = (unsigned char)code;
1948 n = 2;
1949 }
1950 else if (code <= 0xffff) {
1951 pdata[0] = EXT2;
1952 pdata[1] = (unsigned char)(code & 0xff);
1953 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1954 n = 3;
1955 }
1956 else {
1957 pdata[0] = EXT4;
1958 pdata[1] = (unsigned char)(code & 0xff);
1959 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1960 pdata[3] = (unsigned char)((code >> 16) & 0xff);
1961 pdata[4] = (unsigned char)((code >> 24) & 0xff);
1962 n = 5;
1963 }
1964
1965 if (pickler_write(self, pdata, n) < 0)
1966 goto error;
1967 }
1968 else {
1969 /* Generate a normal global opcode if we are using a pickle
1970 protocol <= 2, or if the object is not registered in the
1971 extension registry. */
1972 PyObject *encoded;
1973 PyObject *(*unicode_encoder)(PyObject *);
1974
1975 gen_global:
1976 if (pickler_write(self, &global_op, 1) < 0)
1977 goto error;
1978
1979 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
1980 the module name and the global name using UTF-8. We do so only when
1981 we are using the pickle protocol newer than version 3. This is to
1982 ensure compatibility with older Unpickler running on Python 2.x. */
1983 if (self->proto >= 3) {
1984 unicode_encoder = PyUnicode_AsUTF8String;
1985 }
1986 else {
1987 unicode_encoder = PyUnicode_AsASCIIString;
1988 }
1989
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001990 /* For protocol < 3 and if the user didn't request against doing so,
1991 we convert module names to the old 2.x module names. */
1992 if (self->fix_imports) {
1993 PyObject *key;
1994 PyObject *item;
1995
1996 key = PyTuple_Pack(2, module_name, global_name);
1997 if (key == NULL)
1998 goto error;
1999 item = PyDict_GetItemWithError(name_mapping_3to2, key);
2000 Py_DECREF(key);
2001 if (item) {
2002 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
2003 PyErr_Format(PyExc_RuntimeError,
2004 "_compat_pickle.REVERSE_NAME_MAPPING values "
2005 "should be 2-tuples, not %.200s",
2006 Py_TYPE(item)->tp_name);
2007 goto error;
2008 }
2009 Py_CLEAR(module_name);
2010 Py_CLEAR(global_name);
2011 module_name = PyTuple_GET_ITEM(item, 0);
2012 global_name = PyTuple_GET_ITEM(item, 1);
2013 if (!PyUnicode_Check(module_name) ||
2014 !PyUnicode_Check(global_name)) {
2015 PyErr_Format(PyExc_RuntimeError,
2016 "_compat_pickle.REVERSE_NAME_MAPPING values "
2017 "should be pairs of str, not (%.200s, %.200s)",
2018 Py_TYPE(module_name)->tp_name,
2019 Py_TYPE(global_name)->tp_name);
2020 goto error;
2021 }
2022 Py_INCREF(module_name);
2023 Py_INCREF(global_name);
2024 }
2025 else if (PyErr_Occurred()) {
2026 goto error;
2027 }
2028
2029 item = PyDict_GetItemWithError(import_mapping_3to2, module_name);
2030 if (item) {
2031 if (!PyUnicode_Check(item)) {
2032 PyErr_Format(PyExc_RuntimeError,
2033 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
2034 "should be strings, not %.200s",
2035 Py_TYPE(item)->tp_name);
2036 goto error;
2037 }
2038 Py_CLEAR(module_name);
2039 module_name = item;
2040 Py_INCREF(module_name);
2041 }
2042 else if (PyErr_Occurred()) {
2043 goto error;
2044 }
2045 }
2046
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002047 /* Save the name of the module. */
2048 encoded = unicode_encoder(module_name);
2049 if (encoded == NULL) {
2050 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2051 PyErr_Format(PicklingError,
2052 "can't pickle module identifier '%S' using "
2053 "pickle protocol %i", module_name, self->proto);
2054 goto error;
2055 }
2056 if (pickler_write(self, PyBytes_AS_STRING(encoded),
2057 PyBytes_GET_SIZE(encoded)) < 0) {
2058 Py_DECREF(encoded);
2059 goto error;
2060 }
2061 Py_DECREF(encoded);
2062 if(pickler_write(self, "\n", 1) < 0)
2063 goto error;
2064
2065 /* Save the name of the module. */
2066 encoded = unicode_encoder(global_name);
2067 if (encoded == NULL) {
2068 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2069 PyErr_Format(PicklingError,
2070 "can't pickle global identifier '%S' using "
2071 "pickle protocol %i", global_name, self->proto);
2072 goto error;
2073 }
2074 if (pickler_write(self, PyBytes_AS_STRING(encoded),
2075 PyBytes_GET_SIZE(encoded)) < 0) {
2076 Py_DECREF(encoded);
2077 goto error;
2078 }
2079 Py_DECREF(encoded);
2080 if(pickler_write(self, "\n", 1) < 0)
2081 goto error;
2082
2083 /* Memoize the object. */
2084 if (memo_put(self, obj) < 0)
2085 goto error;
2086 }
2087
2088 if (0) {
2089 error:
2090 status = -1;
2091 }
2092 Py_XDECREF(module_name);
2093 Py_XDECREF(global_name);
2094 Py_XDECREF(module);
2095
2096 return status;
2097}
2098
2099static int
2100save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
2101{
2102 PyObject *pid = NULL;
2103 int status = 0;
2104
2105 const char persid_op = PERSID;
2106 const char binpersid_op = BINPERSID;
2107
2108 Py_INCREF(obj);
2109 pid = pickler_call(self, func, obj);
2110 if (pid == NULL)
2111 return -1;
2112
2113 if (pid != Py_None) {
2114 if (self->bin) {
2115 if (save(self, pid, 1) < 0 ||
2116 pickler_write(self, &binpersid_op, 1) < 0)
2117 goto error;
2118 }
2119 else {
2120 PyObject *pid_str = NULL;
2121 char *pid_ascii_bytes;
2122 Py_ssize_t size;
2123
2124 pid_str = PyObject_Str(pid);
2125 if (pid_str == NULL)
2126 goto error;
2127
2128 /* XXX: Should it check whether the persistent id only contains
2129 ASCII characters? And what if the pid contains embedded
2130 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00002131 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002132 Py_DECREF(pid_str);
2133 if (pid_ascii_bytes == NULL)
2134 goto error;
2135
2136 if (pickler_write(self, &persid_op, 1) < 0 ||
2137 pickler_write(self, pid_ascii_bytes, size) < 0 ||
2138 pickler_write(self, "\n", 1) < 0)
2139 goto error;
2140 }
2141 status = 1;
2142 }
2143
2144 if (0) {
2145 error:
2146 status = -1;
2147 }
2148 Py_XDECREF(pid);
2149
2150 return status;
2151}
2152
2153/* We're saving obj, and args is the 2-thru-5 tuple returned by the
2154 * appropriate __reduce__ method for obj.
2155 */
2156static int
2157save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
2158{
2159 PyObject *callable;
2160 PyObject *argtup;
2161 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002162 PyObject *listitems = Py_None;
2163 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002164 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002165
2166 int use_newobj = self->proto >= 2;
2167
2168 const char reduce_op = REDUCE;
2169 const char build_op = BUILD;
2170 const char newobj_op = NEWOBJ;
2171
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002172 size = PyTuple_Size(args);
2173 if (size < 2 || size > 5) {
2174 PyErr_SetString(PicklingError, "tuple returned by "
2175 "__reduce__ must contain 2 through 5 elements");
2176 return -1;
2177 }
2178
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002179 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2180 &callable, &argtup, &state, &listitems, &dictitems))
2181 return -1;
2182
2183 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002184 PyErr_SetString(PicklingError, "first item of the tuple "
2185 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002186 return -1;
2187 }
2188 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002189 PyErr_SetString(PicklingError, "second item of the tuple "
2190 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002191 return -1;
2192 }
2193
2194 if (state == Py_None)
2195 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002196
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002197 if (listitems == Py_None)
2198 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002199 else if (!PyIter_Check(listitems)) {
2200 PyErr_Format(PicklingError, "Fourth element of tuple"
2201 "returned by __reduce__ must be an iterator, not %s",
2202 Py_TYPE(listitems)->tp_name);
2203 return -1;
2204 }
2205
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002206 if (dictitems == Py_None)
2207 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002208 else if (!PyIter_Check(dictitems)) {
2209 PyErr_Format(PicklingError, "Fifth element of tuple"
2210 "returned by __reduce__ must be an iterator, not %s",
2211 Py_TYPE(dictitems)->tp_name);
2212 return -1;
2213 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002214
2215 /* Protocol 2 special case: if callable's name is __newobj__, use
2216 NEWOBJ. */
2217 if (use_newobj) {
2218 static PyObject *newobj_str = NULL;
2219 PyObject *name_str;
2220
2221 if (newobj_str == NULL) {
2222 newobj_str = PyUnicode_InternFromString("__newobj__");
2223 }
2224
2225 name_str = PyObject_GetAttrString(callable, "__name__");
2226 if (name_str == NULL) {
2227 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2228 PyErr_Clear();
2229 else
2230 return -1;
2231 use_newobj = 0;
2232 }
2233 else {
2234 use_newobj = PyUnicode_Check(name_str) &&
2235 PyUnicode_Compare(name_str, newobj_str) == 0;
2236 Py_DECREF(name_str);
2237 }
2238 }
2239 if (use_newobj) {
2240 PyObject *cls;
2241 PyObject *newargtup;
2242 PyObject *obj_class;
2243 int p;
2244
2245 /* Sanity checks. */
2246 if (Py_SIZE(argtup) < 1) {
2247 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2248 return -1;
2249 }
2250
2251 cls = PyTuple_GET_ITEM(argtup, 0);
2252 if (!PyObject_HasAttrString(cls, "__new__")) {
2253 PyErr_SetString(PicklingError, "args[0] from "
2254 "__newobj__ args has no __new__");
2255 return -1;
2256 }
2257
2258 if (obj != NULL) {
2259 obj_class = PyObject_GetAttrString(obj, "__class__");
2260 if (obj_class == NULL) {
2261 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2262 PyErr_Clear();
2263 else
2264 return -1;
2265 }
2266 p = obj_class != cls; /* true iff a problem */
2267 Py_DECREF(obj_class);
2268 if (p) {
2269 PyErr_SetString(PicklingError, "args[0] from "
2270 "__newobj__ args has the wrong class");
2271 return -1;
2272 }
2273 }
2274 /* XXX: These calls save() are prone to infinite recursion. Imagine
2275 what happen if the value returned by the __reduce__() method of
2276 some extension type contains another object of the same type. Ouch!
2277
2278 Here is a quick example, that I ran into, to illustrate what I
2279 mean:
2280
2281 >>> import pickle, copyreg
2282 >>> copyreg.dispatch_table.pop(complex)
2283 >>> pickle.dumps(1+2j)
2284 Traceback (most recent call last):
2285 ...
2286 RuntimeError: maximum recursion depth exceeded
2287
2288 Removing the complex class from copyreg.dispatch_table made the
2289 __reduce_ex__() method emit another complex object:
2290
2291 >>> (1+1j).__reduce_ex__(2)
2292 (<function __newobj__ at 0xb7b71c3c>,
2293 (<class 'complex'>, (1+1j)), None, None, None)
2294
2295 Thus when save() was called on newargstup (the 2nd item) recursion
2296 ensued. Of course, the bug was in the complex class which had a
2297 broken __getnewargs__() that emitted another complex object. But,
2298 the point, here, is it is quite easy to end up with a broken reduce
2299 function. */
2300
2301 /* Save the class and its __new__ arguments. */
2302 if (save(self, cls, 0) < 0)
2303 return -1;
2304
2305 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
2306 if (newargtup == NULL)
2307 return -1;
2308
2309 p = save(self, newargtup, 0);
2310 Py_DECREF(newargtup);
2311 if (p < 0)
2312 return -1;
2313
2314 /* Add NEWOBJ opcode. */
2315 if (pickler_write(self, &newobj_op, 1) < 0)
2316 return -1;
2317 }
2318 else { /* Not using NEWOBJ. */
2319 if (save(self, callable, 0) < 0 ||
2320 save(self, argtup, 0) < 0 ||
2321 pickler_write(self, &reduce_op, 1) < 0)
2322 return -1;
2323 }
2324
2325 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
2326 the caller do not want to memoize the object. Not particularly useful,
2327 but that is to mimic the behavior save_reduce() in pickle.py when
2328 obj is None. */
2329 if (obj && memo_put(self, obj) < 0)
2330 return -1;
2331
2332 if (listitems && batch_list(self, listitems) < 0)
2333 return -1;
2334
2335 if (dictitems && batch_dict(self, dictitems) < 0)
2336 return -1;
2337
2338 if (state) {
2339 if (save(self, state, 0) < 0 ||
2340 pickler_write(self, &build_op, 1) < 0)
2341 return -1;
2342 }
2343
2344 return 0;
2345}
2346
2347static int
2348save(PicklerObject *self, PyObject *obj, int pers_save)
2349{
2350 PyTypeObject *type;
2351 PyObject *reduce_func = NULL;
2352 PyObject *reduce_value = NULL;
2353 PyObject *memo_key = NULL;
2354 int status = 0;
2355
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002356 if (Py_EnterRecursiveCall(" while pickling an object") < 0)
2357 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002358
2359 /* The extra pers_save argument is necessary to avoid calling save_pers()
2360 on its returned object. */
2361 if (!pers_save && self->pers_func) {
2362 /* save_pers() returns:
2363 -1 to signal an error;
2364 0 if it did nothing successfully;
2365 1 if a persistent id was saved.
2366 */
2367 if ((status = save_pers(self, obj, self->pers_func)) != 0)
2368 goto done;
2369 }
2370
2371 type = Py_TYPE(obj);
2372
2373 /* XXX: The old cPickle had an optimization that used switch-case
2374 statement dispatching on the first letter of the type name. It was
2375 probably not a bad idea after all. If benchmarks shows that particular
2376 optimization had some real benefits, it would be nice to add it
2377 back. */
2378
2379 /* Atom types; these aren't memoized, so don't check the memo. */
2380
2381 if (obj == Py_None) {
2382 status = save_none(self, obj);
2383 goto done;
2384 }
2385 else if (obj == Py_False || obj == Py_True) {
2386 status = save_bool(self, obj);
2387 goto done;
2388 }
2389 else if (type == &PyLong_Type) {
2390 status = save_long(self, obj);
2391 goto done;
2392 }
2393 else if (type == &PyFloat_Type) {
2394 status = save_float(self, obj);
2395 goto done;
2396 }
2397
2398 /* Check the memo to see if it has the object. If so, generate
2399 a GET (or BINGET) opcode, instead of pickling the object
2400 once again. */
2401 memo_key = PyLong_FromVoidPtr(obj);
2402 if (memo_key == NULL)
2403 goto error;
2404 if (PyDict_GetItem(self->memo, memo_key)) {
2405 if (memo_get(self, memo_key) < 0)
2406 goto error;
2407 goto done;
2408 }
2409
2410 if (type == &PyBytes_Type) {
2411 status = save_bytes(self, obj);
2412 goto done;
2413 }
2414 else if (type == &PyUnicode_Type) {
2415 status = save_unicode(self, obj);
2416 goto done;
2417 }
2418 else if (type == &PyDict_Type) {
2419 status = save_dict(self, obj);
2420 goto done;
2421 }
2422 else if (type == &PyList_Type) {
2423 status = save_list(self, obj);
2424 goto done;
2425 }
2426 else if (type == &PyTuple_Type) {
2427 status = save_tuple(self, obj);
2428 goto done;
2429 }
2430 else if (type == &PyType_Type) {
2431 status = save_global(self, obj, NULL);
2432 goto done;
2433 }
2434 else if (type == &PyFunction_Type) {
2435 status = save_global(self, obj, NULL);
2436 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
2437 /* fall back to reduce */
2438 PyErr_Clear();
2439 }
2440 else {
2441 goto done;
2442 }
2443 }
2444 else if (type == &PyCFunction_Type) {
2445 status = save_global(self, obj, NULL);
2446 goto done;
2447 }
2448 else if (PyType_IsSubtype(type, &PyType_Type)) {
2449 status = save_global(self, obj, NULL);
2450 goto done;
2451 }
2452
2453 /* XXX: This part needs some unit tests. */
2454
2455 /* Get a reduction callable, and call it. This may come from
2456 * copyreg.dispatch_table, the object's __reduce_ex__ method,
2457 * or the object's __reduce__ method.
2458 */
2459 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
2460 if (reduce_func != NULL) {
2461 /* Here, the reference count of the reduce_func object returned by
2462 PyDict_GetItem needs to be increased to be consistent with the one
2463 returned by PyObject_GetAttr. This is allow us to blindly DECREF
2464 reduce_func at the end of the save() routine.
2465 */
2466 Py_INCREF(reduce_func);
2467 Py_INCREF(obj);
2468 reduce_value = pickler_call(self, reduce_func, obj);
2469 }
2470 else {
2471 static PyObject *reduce_str = NULL;
2472 static PyObject *reduce_ex_str = NULL;
2473
2474 /* Cache the name of the reduce methods. */
2475 if (reduce_str == NULL) {
2476 reduce_str = PyUnicode_InternFromString("__reduce__");
2477 if (reduce_str == NULL)
2478 goto error;
2479 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
2480 if (reduce_ex_str == NULL)
2481 goto error;
2482 }
2483
2484 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
2485 automatically defined as __reduce__. While this is convenient, this
2486 make it impossible to know which method was actually called. Of
2487 course, this is not a big deal. But still, it would be nice to let
2488 the user know which method was called when something go
2489 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
2490 don't actually have to check for a __reduce__ method. */
2491
2492 /* Check for a __reduce_ex__ method. */
2493 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
2494 if (reduce_func != NULL) {
2495 PyObject *proto;
2496 proto = PyLong_FromLong(self->proto);
2497 if (proto != NULL) {
2498 reduce_value = pickler_call(self, reduce_func, proto);
2499 }
2500 }
2501 else {
2502 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2503 PyErr_Clear();
2504 else
2505 goto error;
2506 /* Check for a __reduce__ method. */
2507 reduce_func = PyObject_GetAttr(obj, reduce_str);
2508 if (reduce_func != NULL) {
2509 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
2510 }
2511 else {
2512 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
2513 type->tp_name, obj);
2514 goto error;
2515 }
2516 }
2517 }
2518
2519 if (reduce_value == NULL)
2520 goto error;
2521
2522 if (PyUnicode_Check(reduce_value)) {
2523 status = save_global(self, obj, reduce_value);
2524 goto done;
2525 }
2526
2527 if (!PyTuple_Check(reduce_value)) {
2528 PyErr_SetString(PicklingError,
2529 "__reduce__ must return a string or tuple");
2530 goto error;
2531 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002532
2533 status = save_reduce(self, reduce_value, obj);
2534
2535 if (0) {
2536 error:
2537 status = -1;
2538 }
2539 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002540 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002541 Py_XDECREF(memo_key);
2542 Py_XDECREF(reduce_func);
2543 Py_XDECREF(reduce_value);
2544
2545 return status;
2546}
2547
2548static int
2549dump(PicklerObject *self, PyObject *obj)
2550{
2551 const char stop_op = STOP;
2552
2553 if (self->proto >= 2) {
2554 char header[2];
2555
2556 header[0] = PROTO;
2557 assert(self->proto >= 0 && self->proto < 256);
2558 header[1] = (unsigned char)self->proto;
2559 if (pickler_write(self, header, 2) < 0)
2560 return -1;
2561 }
2562
2563 if (save(self, obj, 0) < 0 ||
2564 pickler_write(self, &stop_op, 1) < 0 ||
2565 pickler_write(self, NULL, 0) < 0)
2566 return -1;
2567
2568 return 0;
2569}
2570
2571PyDoc_STRVAR(Pickler_clear_memo_doc,
2572"clear_memo() -> None. Clears the pickler's \"memo\"."
2573"\n"
2574"The memo is the data structure that remembers which objects the\n"
2575"pickler has already seen, so that shared or recursive objects are\n"
2576"pickled by reference and not by value. This method is useful when\n"
2577"re-using picklers.");
2578
2579static PyObject *
2580Pickler_clear_memo(PicklerObject *self)
2581{
2582 if (self->memo)
2583 PyDict_Clear(self->memo);
2584
2585 Py_RETURN_NONE;
2586}
2587
2588PyDoc_STRVAR(Pickler_dump_doc,
2589"dump(obj) -> None. Write a pickled representation of obj to the open file.");
2590
2591static PyObject *
2592Pickler_dump(PicklerObject *self, PyObject *args)
2593{
2594 PyObject *obj;
2595
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00002596 /* Check whether the Pickler was initialized correctly (issue3664).
2597 Developers often forget to call __init__() in their subclasses, which
2598 would trigger a segfault without this check. */
2599 if (self->write == NULL) {
2600 PyErr_Format(PicklingError,
2601 "Pickler.__init__() was not called by %s.__init__()",
2602 Py_TYPE(self)->tp_name);
2603 return NULL;
2604 }
2605
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002606 if (!PyArg_ParseTuple(args, "O:dump", &obj))
2607 return NULL;
2608
2609 if (dump(self, obj) < 0)
2610 return NULL;
2611
2612 Py_RETURN_NONE;
2613}
2614
2615static struct PyMethodDef Pickler_methods[] = {
2616 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
2617 Pickler_dump_doc},
2618 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
2619 Pickler_clear_memo_doc},
2620 {NULL, NULL} /* sentinel */
2621};
2622
2623static void
2624Pickler_dealloc(PicklerObject *self)
2625{
2626 PyObject_GC_UnTrack(self);
2627
2628 Py_XDECREF(self->write);
2629 Py_XDECREF(self->memo);
2630 Py_XDECREF(self->pers_func);
2631 Py_XDECREF(self->arg);
2632 Py_XDECREF(self->fast_memo);
2633
2634 PyMem_Free(self->write_buf);
2635
2636 Py_TYPE(self)->tp_free((PyObject *)self);
2637}
2638
2639static int
2640Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
2641{
2642 Py_VISIT(self->write);
2643 Py_VISIT(self->memo);
2644 Py_VISIT(self->pers_func);
2645 Py_VISIT(self->arg);
2646 Py_VISIT(self->fast_memo);
2647 return 0;
2648}
2649
2650static int
2651Pickler_clear(PicklerObject *self)
2652{
2653 Py_CLEAR(self->write);
2654 Py_CLEAR(self->memo);
2655 Py_CLEAR(self->pers_func);
2656 Py_CLEAR(self->arg);
2657 Py_CLEAR(self->fast_memo);
2658
2659 PyMem_Free(self->write_buf);
2660 self->write_buf = NULL;
2661
2662 return 0;
2663}
2664
2665PyDoc_STRVAR(Pickler_doc,
2666"Pickler(file, protocol=None)"
2667"\n"
2668"This takes a binary file for writing a pickle data stream.\n"
2669"\n"
2670"The optional protocol argument tells the pickler to use the\n"
2671"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
2672"protocol is 3; a backward-incompatible protocol designed for\n"
2673"Python 3.0.\n"
2674"\n"
2675"Specifying a negative protocol version selects the highest\n"
2676"protocol version supported. The higher the protocol used, the\n"
2677"more recent the version of Python needed to read the pickle\n"
2678"produced.\n"
2679"\n"
2680"The file argument must have a write() method that accepts a single\n"
2681"bytes argument. It can thus be a file object opened for binary\n"
2682"writing, a io.BytesIO instance, or any other custom object that\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002683"meets this interface.\n"
2684"\n"
2685"If fix_imports is True and protocol is less than 3, pickle will try to\n"
2686"map the new Python 3.x names to the old module names used in Python\n"
2687"2.x, so that the pickle data stream is readable with Python 2.x.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002688
2689static int
2690Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
2691{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002692 static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002693 PyObject *file;
2694 PyObject *proto_obj = NULL;
2695 long proto = 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002696 int fix_imports = 1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002697
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002698 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oi:Pickler",
2699 kwlist, &file, &proto_obj, &fix_imports))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002700 return -1;
2701
2702 /* In case of multiple __init__() calls, clear previous content. */
2703 if (self->write != NULL)
2704 (void)Pickler_clear(self);
2705
2706 if (proto_obj == NULL || proto_obj == Py_None)
2707 proto = DEFAULT_PROTOCOL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002708 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002709 proto = PyLong_AsLong(proto_obj);
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002710 if (proto == -1 && PyErr_Occurred())
2711 return -1;
2712 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002713
2714 if (proto < 0)
2715 proto = HIGHEST_PROTOCOL;
2716 if (proto > HIGHEST_PROTOCOL) {
2717 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
2718 HIGHEST_PROTOCOL);
2719 return -1;
2720 }
2721
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002722 self->proto = proto;
2723 self->bin = proto > 0;
2724 self->arg = NULL;
2725 self->fast = 0;
2726 self->fast_nesting = 0;
2727 self->fast_memo = NULL;
2728 self->fix_imports = fix_imports && proto < 3;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002729
2730 if (!PyObject_HasAttrString(file, "write")) {
2731 PyErr_SetString(PyExc_TypeError,
2732 "file must have a 'write' attribute");
2733 return -1;
2734 }
2735 self->write = PyObject_GetAttrString(file, "write");
2736 if (self->write == NULL)
2737 return -1;
2738 self->buf_size = 0;
2739 self->write_buf = (char *)PyMem_Malloc(WRITE_BUF_SIZE);
2740 if (self->write_buf == NULL) {
2741 PyErr_NoMemory();
2742 return -1;
2743 }
2744 self->pers_func = NULL;
2745 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
2746 self->pers_func = PyObject_GetAttrString((PyObject *)self,
2747 "persistent_id");
2748 if (self->pers_func == NULL)
2749 return -1;
2750 }
2751 self->memo = PyDict_New();
2752 if (self->memo == NULL)
2753 return -1;
2754
2755 return 0;
2756}
2757
2758static PyObject *
2759Pickler_get_memo(PicklerObject *self)
2760{
2761 if (self->memo == NULL)
2762 PyErr_SetString(PyExc_AttributeError, "memo");
2763 else
2764 Py_INCREF(self->memo);
2765 return self->memo;
2766}
2767
2768static int
2769Pickler_set_memo(PicklerObject *self, PyObject *value)
2770{
2771 PyObject *tmp;
2772
2773 if (value == NULL) {
2774 PyErr_SetString(PyExc_TypeError,
2775 "attribute deletion is not supported");
2776 return -1;
2777 }
2778 if (!PyDict_Check(value)) {
2779 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
2780 return -1;
2781 }
2782
2783 tmp = self->memo;
2784 Py_INCREF(value);
2785 self->memo = value;
2786 Py_XDECREF(tmp);
2787
2788 return 0;
2789}
2790
2791static PyObject *
2792Pickler_get_persid(PicklerObject *self)
2793{
2794 if (self->pers_func == NULL)
2795 PyErr_SetString(PyExc_AttributeError, "persistent_id");
2796 else
2797 Py_INCREF(self->pers_func);
2798 return self->pers_func;
2799}
2800
2801static int
2802Pickler_set_persid(PicklerObject *self, PyObject *value)
2803{
2804 PyObject *tmp;
2805
2806 if (value == NULL) {
2807 PyErr_SetString(PyExc_TypeError,
2808 "attribute deletion is not supported");
2809 return -1;
2810 }
2811 if (!PyCallable_Check(value)) {
2812 PyErr_SetString(PyExc_TypeError,
2813 "persistent_id must be a callable taking one argument");
2814 return -1;
2815 }
2816
2817 tmp = self->pers_func;
2818 Py_INCREF(value);
2819 self->pers_func = value;
2820 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
2821
2822 return 0;
2823}
2824
2825static PyMemberDef Pickler_members[] = {
2826 {"bin", T_INT, offsetof(PicklerObject, bin)},
2827 {"fast", T_INT, offsetof(PicklerObject, fast)},
2828 {NULL}
2829};
2830
2831static PyGetSetDef Pickler_getsets[] = {
2832 {"memo", (getter)Pickler_get_memo,
2833 (setter)Pickler_set_memo},
2834 {"persistent_id", (getter)Pickler_get_persid,
2835 (setter)Pickler_set_persid},
2836 {NULL}
2837};
2838
2839static PyTypeObject Pickler_Type = {
2840 PyVarObject_HEAD_INIT(NULL, 0)
2841 "_pickle.Pickler" , /*tp_name*/
2842 sizeof(PicklerObject), /*tp_basicsize*/
2843 0, /*tp_itemsize*/
2844 (destructor)Pickler_dealloc, /*tp_dealloc*/
2845 0, /*tp_print*/
2846 0, /*tp_getattr*/
2847 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00002848 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002849 0, /*tp_repr*/
2850 0, /*tp_as_number*/
2851 0, /*tp_as_sequence*/
2852 0, /*tp_as_mapping*/
2853 0, /*tp_hash*/
2854 0, /*tp_call*/
2855 0, /*tp_str*/
2856 0, /*tp_getattro*/
2857 0, /*tp_setattro*/
2858 0, /*tp_as_buffer*/
2859 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2860 Pickler_doc, /*tp_doc*/
2861 (traverseproc)Pickler_traverse, /*tp_traverse*/
2862 (inquiry)Pickler_clear, /*tp_clear*/
2863 0, /*tp_richcompare*/
2864 0, /*tp_weaklistoffset*/
2865 0, /*tp_iter*/
2866 0, /*tp_iternext*/
2867 Pickler_methods, /*tp_methods*/
2868 Pickler_members, /*tp_members*/
2869 Pickler_getsets, /*tp_getset*/
2870 0, /*tp_base*/
2871 0, /*tp_dict*/
2872 0, /*tp_descr_get*/
2873 0, /*tp_descr_set*/
2874 0, /*tp_dictoffset*/
2875 (initproc)Pickler_init, /*tp_init*/
2876 PyType_GenericAlloc, /*tp_alloc*/
2877 PyType_GenericNew, /*tp_new*/
2878 PyObject_GC_Del, /*tp_free*/
2879 0, /*tp_is_gc*/
2880};
2881
2882/* Temporary helper for calling self.find_class().
2883
2884 XXX: It would be nice to able to avoid Python function call overhead, by
2885 using directly the C version of find_class(), when find_class() is not
2886 overridden by a subclass. Although, this could become rather hackish. A
2887 simpler optimization would be to call the C function when self is not a
2888 subclass instance. */
2889static PyObject *
2890find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
2891{
2892 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
2893 module_name, global_name);
2894}
2895
2896static int
2897marker(UnpicklerObject *self)
2898{
2899 if (self->num_marks < 1) {
2900 PyErr_SetString(UnpicklingError, "could not find MARK");
2901 return -1;
2902 }
2903
2904 return self->marks[--self->num_marks];
2905}
2906
2907static int
2908load_none(UnpicklerObject *self)
2909{
2910 PDATA_APPEND(self->stack, Py_None, -1);
2911 return 0;
2912}
2913
2914static int
2915bad_readline(void)
2916{
2917 PyErr_SetString(UnpicklingError, "pickle data was truncated");
2918 return -1;
2919}
2920
2921static int
2922load_int(UnpicklerObject *self)
2923{
2924 PyObject *value;
2925 char *endptr, *s;
2926 Py_ssize_t len;
2927 long x;
2928
2929 if ((len = unpickler_readline(self, &s)) < 0)
2930 return -1;
2931 if (len < 2)
2932 return bad_readline();
2933
2934 errno = 0;
2935 /* XXX: Should the base argument of strtol() be explicitly set to 10? */
2936 x = strtol(s, &endptr, 0);
2937
2938 if (errno || (*endptr != '\n') || (endptr[1] != '\0')) {
2939 /* Hm, maybe we've got something long. Let's try reading
2940 * it as a Python long object. */
2941 errno = 0;
2942 /* XXX: Same thing about the base here. */
2943 value = PyLong_FromString(s, NULL, 0);
2944 if (value == NULL) {
2945 PyErr_SetString(PyExc_ValueError,
2946 "could not convert string to int");
2947 return -1;
2948 }
2949 }
2950 else {
2951 if (len == 3 && (x == 0 || x == 1)) {
2952 if ((value = PyBool_FromLong(x)) == NULL)
2953 return -1;
2954 }
2955 else {
2956 if ((value = PyLong_FromLong(x)) == NULL)
2957 return -1;
2958 }
2959 }
2960
2961 PDATA_PUSH(self->stack, value, -1);
2962 return 0;
2963}
2964
2965static int
2966load_bool(UnpicklerObject *self, PyObject *boolean)
2967{
2968 assert(boolean == Py_True || boolean == Py_False);
2969 PDATA_APPEND(self->stack, boolean, -1);
2970 return 0;
2971}
2972
2973/* s contains x bytes of a little-endian integer. Return its value as a
2974 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
2975 * int, but when x is 4 it's a signed one. This is an historical source
2976 * of x-platform bugs.
2977 */
2978static long
2979calc_binint(char *bytes, int size)
2980{
2981 unsigned char *s = (unsigned char *)bytes;
2982 int i = size;
2983 long x = 0;
2984
2985 for (i = 0; i < size; i++) {
2986 x |= (long)s[i] << (i * 8);
2987 }
2988
2989 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
2990 * is signed, so on a box with longs bigger than 4 bytes we need
2991 * to extend a BININT's sign bit to the full width.
2992 */
2993 if (SIZEOF_LONG > 4 && size == 4) {
2994 x |= -(x & (1L << 31));
2995 }
2996
2997 return x;
2998}
2999
3000static int
3001load_binintx(UnpicklerObject *self, char *s, int size)
3002{
3003 PyObject *value;
3004 long x;
3005
3006 x = calc_binint(s, size);
3007
3008 if ((value = PyLong_FromLong(x)) == NULL)
3009 return -1;
3010
3011 PDATA_PUSH(self->stack, value, -1);
3012 return 0;
3013}
3014
3015static int
3016load_binint(UnpicklerObject *self)
3017{
3018 char *s;
3019
3020 if (unpickler_read(self, &s, 4) < 0)
3021 return -1;
3022
3023 return load_binintx(self, s, 4);
3024}
3025
3026static int
3027load_binint1(UnpicklerObject *self)
3028{
3029 char *s;
3030
3031 if (unpickler_read(self, &s, 1) < 0)
3032 return -1;
3033
3034 return load_binintx(self, s, 1);
3035}
3036
3037static int
3038load_binint2(UnpicklerObject *self)
3039{
3040 char *s;
3041
3042 if (unpickler_read(self, &s, 2) < 0)
3043 return -1;
3044
3045 return load_binintx(self, s, 2);
3046}
3047
3048static int
3049load_long(UnpicklerObject *self)
3050{
3051 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003052 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003053 Py_ssize_t len;
3054
3055 if ((len = unpickler_readline(self, &s)) < 0)
3056 return -1;
3057 if (len < 2)
3058 return bad_readline();
3059
Mark Dickinson8dd05142009-01-20 20:43:58 +00003060 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
3061 the 'L' before calling PyLong_FromString. In order to maintain
3062 compatibility with Python 3.0.0, we don't actually *require*
3063 the 'L' to be present. */
3064 if (s[len-2] == 'L') {
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003065 s[len-2] = '\0';
Mark Dickinson8dd05142009-01-20 20:43:58 +00003066 }
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00003067 /* XXX: Should the base argument explicitly set to 10? */
3068 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00003069 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003070 return -1;
3071
3072 PDATA_PUSH(self->stack, value, -1);
3073 return 0;
3074}
3075
3076/* 'size' bytes contain the # of bytes of little-endian 256's-complement
3077 * data following.
3078 */
3079static int
3080load_counted_long(UnpicklerObject *self, int size)
3081{
3082 PyObject *value;
3083 char *nbytes;
3084 char *pdata;
3085
3086 assert(size == 1 || size == 4);
3087 if (unpickler_read(self, &nbytes, size) < 0)
3088 return -1;
3089
3090 size = calc_binint(nbytes, size);
3091 if (size < 0) {
3092 /* Corrupt or hostile pickle -- we never write one like this */
3093 PyErr_SetString(UnpicklingError,
3094 "LONG pickle has negative byte count");
3095 return -1;
3096 }
3097
3098 if (size == 0)
3099 value = PyLong_FromLong(0L);
3100 else {
3101 /* Read the raw little-endian bytes and convert. */
3102 if (unpickler_read(self, &pdata, size) < 0)
3103 return -1;
3104 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
3105 1 /* little endian */ , 1 /* signed */ );
3106 }
3107 if (value == NULL)
3108 return -1;
3109 PDATA_PUSH(self->stack, value, -1);
3110 return 0;
3111}
3112
3113static int
3114load_float(UnpicklerObject *self)
3115{
3116 PyObject *value;
3117 char *endptr, *s;
3118 Py_ssize_t len;
3119 double d;
3120
3121 if ((len = unpickler_readline(self, &s)) < 0)
3122 return -1;
3123 if (len < 2)
3124 return bad_readline();
3125
3126 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00003127 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
3128 if (d == -1.0 && PyErr_Occurred())
3129 return -1;
3130 if ((endptr[0] != '\n') || (endptr[1] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003131 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
3132 return -1;
3133 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00003134 value = PyFloat_FromDouble(d);
3135 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003136 return -1;
3137
3138 PDATA_PUSH(self->stack, value, -1);
3139 return 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00003140 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003141
3142static int
3143load_binfloat(UnpicklerObject *self)
3144{
3145 PyObject *value;
3146 double x;
3147 char *s;
3148
3149 if (unpickler_read(self, &s, 8) < 0)
3150 return -1;
3151
3152 x = _PyFloat_Unpack8((unsigned char *)s, 0);
3153 if (x == -1.0 && PyErr_Occurred())
3154 return -1;
3155
3156 if ((value = PyFloat_FromDouble(x)) == NULL)
3157 return -1;
3158
3159 PDATA_PUSH(self->stack, value, -1);
3160 return 0;
3161}
3162
3163static int
3164load_string(UnpicklerObject *self)
3165{
3166 PyObject *bytes;
3167 PyObject *str = NULL;
3168 Py_ssize_t len;
3169 char *s, *p;
3170
3171 if ((len = unpickler_readline(self, &s)) < 0)
3172 return -1;
3173 if (len < 3)
3174 return bad_readline();
3175 if ((s = strdup(s)) == NULL) {
3176 PyErr_NoMemory();
3177 return -1;
3178 }
3179
3180 /* Strip outermost quotes */
3181 while (s[len - 1] <= ' ')
3182 len--;
3183 if (s[0] == '"' && s[len - 1] == '"') {
3184 s[len - 1] = '\0';
3185 p = s + 1;
3186 len -= 2;
3187 }
3188 else if (s[0] == '\'' && s[len - 1] == '\'') {
3189 s[len - 1] = '\0';
3190 p = s + 1;
3191 len -= 2;
3192 }
3193 else {
3194 free(s);
3195 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
3196 return -1;
3197 }
3198
3199 /* Use the PyBytes API to decode the string, since that is what is used
3200 to encode, and then coerce the result to Unicode. */
3201 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
3202 free(s);
3203 if (bytes == NULL)
3204 return -1;
3205 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
3206 Py_DECREF(bytes);
3207 if (str == NULL)
3208 return -1;
3209
3210 PDATA_PUSH(self->stack, str, -1);
3211 return 0;
3212}
3213
3214static int
3215load_binbytes(UnpicklerObject *self)
3216{
3217 PyObject *bytes;
3218 long x;
3219 char *s;
3220
3221 if (unpickler_read(self, &s, 4) < 0)
3222 return -1;
3223
3224 x = calc_binint(s, 4);
3225 if (x < 0) {
3226 PyErr_SetString(UnpicklingError,
3227 "BINBYTES pickle has negative byte count");
3228 return -1;
3229 }
3230
3231 if (unpickler_read(self, &s, x) < 0)
3232 return -1;
3233 bytes = PyBytes_FromStringAndSize(s, x);
3234 if (bytes == NULL)
3235 return -1;
3236
3237 PDATA_PUSH(self->stack, bytes, -1);
3238 return 0;
3239}
3240
3241static int
3242load_short_binbytes(UnpicklerObject *self)
3243{
3244 PyObject *bytes;
3245 unsigned char x;
3246 char *s;
3247
3248 if (unpickler_read(self, &s, 1) < 0)
3249 return -1;
3250
3251 x = (unsigned char)s[0];
3252
3253 if (unpickler_read(self, &s, x) < 0)
3254 return -1;
3255
3256 bytes = PyBytes_FromStringAndSize(s, x);
3257 if (bytes == NULL)
3258 return -1;
3259
3260 PDATA_PUSH(self->stack, bytes, -1);
3261 return 0;
3262}
3263
3264static int
3265load_binstring(UnpicklerObject *self)
3266{
3267 PyObject *str;
3268 long x;
3269 char *s;
3270
3271 if (unpickler_read(self, &s, 4) < 0)
3272 return -1;
3273
3274 x = calc_binint(s, 4);
3275 if (x < 0) {
3276 PyErr_SetString(UnpicklingError,
3277 "BINSTRING pickle has negative byte count");
3278 return -1;
3279 }
3280
3281 if (unpickler_read(self, &s, x) < 0)
3282 return -1;
3283
3284 /* Convert Python 2.x strings to unicode. */
3285 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3286 if (str == NULL)
3287 return -1;
3288
3289 PDATA_PUSH(self->stack, str, -1);
3290 return 0;
3291}
3292
3293static int
3294load_short_binstring(UnpicklerObject *self)
3295{
3296 PyObject *str;
3297 unsigned char x;
3298 char *s;
3299
3300 if (unpickler_read(self, &s, 1) < 0)
3301 return -1;
3302
3303 x = (unsigned char)s[0];
3304
3305 if (unpickler_read(self, &s, x) < 0)
3306 return -1;
3307
3308 /* Convert Python 2.x strings to unicode. */
3309 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3310 if (str == NULL)
3311 return -1;
3312
3313 PDATA_PUSH(self->stack, str, -1);
3314 return 0;
3315}
3316
3317static int
3318load_unicode(UnpicklerObject *self)
3319{
3320 PyObject *str;
3321 Py_ssize_t len;
3322 char *s;
3323
3324 if ((len = unpickler_readline(self, &s)) < 0)
3325 return -1;
3326 if (len < 1)
3327 return bad_readline();
3328
3329 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
3330 if (str == NULL)
3331 return -1;
3332
3333 PDATA_PUSH(self->stack, str, -1);
3334 return 0;
3335}
3336
3337static int
3338load_binunicode(UnpicklerObject *self)
3339{
3340 PyObject *str;
3341 long size;
3342 char *s;
3343
3344 if (unpickler_read(self, &s, 4) < 0)
3345 return -1;
3346
3347 size = calc_binint(s, 4);
3348 if (size < 0) {
3349 PyErr_SetString(UnpicklingError,
3350 "BINUNICODE pickle has negative byte count");
3351 return -1;
3352 }
3353
3354 if (unpickler_read(self, &s, size) < 0)
3355 return -1;
3356
Victor Stinner485fb562010-04-13 11:07:24 +00003357 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003358 if (str == NULL)
3359 return -1;
3360
3361 PDATA_PUSH(self->stack, str, -1);
3362 return 0;
3363}
3364
3365static int
3366load_tuple(UnpicklerObject *self)
3367{
3368 PyObject *tuple;
3369 int i;
3370
3371 if ((i = marker(self)) < 0)
3372 return -1;
3373
3374 tuple = Pdata_poptuple(self->stack, i);
3375 if (tuple == NULL)
3376 return -1;
3377 PDATA_PUSH(self->stack, tuple, -1);
3378 return 0;
3379}
3380
3381static int
3382load_counted_tuple(UnpicklerObject *self, int len)
3383{
3384 PyObject *tuple;
3385
3386 tuple = PyTuple_New(len);
3387 if (tuple == NULL)
3388 return -1;
3389
3390 while (--len >= 0) {
3391 PyObject *item;
3392
3393 PDATA_POP(self->stack, item);
3394 if (item == NULL)
3395 return -1;
3396 PyTuple_SET_ITEM(tuple, len, item);
3397 }
3398 PDATA_PUSH(self->stack, tuple, -1);
3399 return 0;
3400}
3401
3402static int
3403load_empty_list(UnpicklerObject *self)
3404{
3405 PyObject *list;
3406
3407 if ((list = PyList_New(0)) == NULL)
3408 return -1;
3409 PDATA_PUSH(self->stack, list, -1);
3410 return 0;
3411}
3412
3413static int
3414load_empty_dict(UnpicklerObject *self)
3415{
3416 PyObject *dict;
3417
3418 if ((dict = PyDict_New()) == NULL)
3419 return -1;
3420 PDATA_PUSH(self->stack, dict, -1);
3421 return 0;
3422}
3423
3424static int
3425load_list(UnpicklerObject *self)
3426{
3427 PyObject *list;
3428 int i;
3429
3430 if ((i = marker(self)) < 0)
3431 return -1;
3432
3433 list = Pdata_poplist(self->stack, i);
3434 if (list == NULL)
3435 return -1;
3436 PDATA_PUSH(self->stack, list, -1);
3437 return 0;
3438}
3439
3440static int
3441load_dict(UnpicklerObject *self)
3442{
3443 PyObject *dict, *key, *value;
3444 int i, j, k;
3445
3446 if ((i = marker(self)) < 0)
3447 return -1;
3448 j = self->stack->length;
3449
3450 if ((dict = PyDict_New()) == NULL)
3451 return -1;
3452
3453 for (k = i + 1; k < j; k += 2) {
3454 key = self->stack->data[k - 1];
3455 value = self->stack->data[k];
3456 if (PyDict_SetItem(dict, key, value) < 0) {
3457 Py_DECREF(dict);
3458 return -1;
3459 }
3460 }
3461 Pdata_clear(self->stack, i);
3462 PDATA_PUSH(self->stack, dict, -1);
3463 return 0;
3464}
3465
3466static PyObject *
3467instantiate(PyObject *cls, PyObject *args)
3468{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00003469 PyObject *result = NULL;
3470 /* Caller must assure args are a tuple. Normally, args come from
3471 Pdata_poptuple which packs objects from the top of the stack
3472 into a newly created tuple. */
3473 assert(PyTuple_Check(args));
3474 if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
3475 PyObject_HasAttrString(cls, "__getinitargs__")) {
3476 result = PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003477 }
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00003478 else {
3479 result = PyObject_CallMethod(cls, "__new__", "O", cls);
3480 }
3481 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003482}
3483
3484static int
3485load_obj(UnpicklerObject *self)
3486{
3487 PyObject *cls, *args, *obj = NULL;
3488 int i;
3489
3490 if ((i = marker(self)) < 0)
3491 return -1;
3492
3493 args = Pdata_poptuple(self->stack, i + 1);
3494 if (args == NULL)
3495 return -1;
3496
3497 PDATA_POP(self->stack, cls);
3498 if (cls) {
3499 obj = instantiate(cls, args);
3500 Py_DECREF(cls);
3501 }
3502 Py_DECREF(args);
3503 if (obj == NULL)
3504 return -1;
3505
3506 PDATA_PUSH(self->stack, obj, -1);
3507 return 0;
3508}
3509
3510static int
3511load_inst(UnpicklerObject *self)
3512{
3513 PyObject *cls = NULL;
3514 PyObject *args = NULL;
3515 PyObject *obj = NULL;
3516 PyObject *module_name;
3517 PyObject *class_name;
3518 Py_ssize_t len;
3519 int i;
3520 char *s;
3521
3522 if ((i = marker(self)) < 0)
3523 return -1;
3524 if ((len = unpickler_readline(self, &s)) < 0)
3525 return -1;
3526 if (len < 2)
3527 return bad_readline();
3528
3529 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
3530 identifiers are permitted in Python 3.0, since the INST opcode is only
3531 supported by older protocols on Python 2.x. */
3532 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3533 if (module_name == NULL)
3534 return -1;
3535
3536 if ((len = unpickler_readline(self, &s)) >= 0) {
3537 if (len < 2)
3538 return bad_readline();
3539 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00003540 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003541 cls = find_class(self, module_name, class_name);
3542 Py_DECREF(class_name);
3543 }
3544 }
3545 Py_DECREF(module_name);
3546
3547 if (cls == NULL)
3548 return -1;
3549
3550 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
3551 obj = instantiate(cls, args);
3552 Py_DECREF(args);
3553 }
3554 Py_DECREF(cls);
3555
3556 if (obj == NULL)
3557 return -1;
3558
3559 PDATA_PUSH(self->stack, obj, -1);
3560 return 0;
3561}
3562
3563static int
3564load_newobj(UnpicklerObject *self)
3565{
3566 PyObject *args = NULL;
3567 PyObject *clsraw = NULL;
3568 PyTypeObject *cls; /* clsraw cast to its true type */
3569 PyObject *obj;
3570
3571 /* Stack is ... cls argtuple, and we want to call
3572 * cls.__new__(cls, *argtuple).
3573 */
3574 PDATA_POP(self->stack, args);
3575 if (args == NULL)
3576 goto error;
3577 if (!PyTuple_Check(args)) {
3578 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
3579 goto error;
3580 }
3581
3582 PDATA_POP(self->stack, clsraw);
3583 cls = (PyTypeObject *)clsraw;
3584 if (cls == NULL)
3585 goto error;
3586 if (!PyType_Check(cls)) {
3587 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3588 "isn't a type object");
3589 goto error;
3590 }
3591 if (cls->tp_new == NULL) {
3592 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3593 "has NULL tp_new");
3594 goto error;
3595 }
3596
3597 /* Call __new__. */
3598 obj = cls->tp_new(cls, args, NULL);
3599 if (obj == NULL)
3600 goto error;
3601
3602 Py_DECREF(args);
3603 Py_DECREF(clsraw);
3604 PDATA_PUSH(self->stack, obj, -1);
3605 return 0;
3606
3607 error:
3608 Py_XDECREF(args);
3609 Py_XDECREF(clsraw);
3610 return -1;
3611}
3612
3613static int
3614load_global(UnpicklerObject *self)
3615{
3616 PyObject *global = NULL;
3617 PyObject *module_name;
3618 PyObject *global_name;
3619 Py_ssize_t len;
3620 char *s;
3621
3622 if ((len = unpickler_readline(self, &s)) < 0)
3623 return -1;
3624 if (len < 2)
3625 return bad_readline();
3626 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3627 if (!module_name)
3628 return -1;
3629
3630 if ((len = unpickler_readline(self, &s)) >= 0) {
3631 if (len < 2) {
3632 Py_DECREF(module_name);
3633 return bad_readline();
3634 }
3635 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3636 if (global_name) {
3637 global = find_class(self, module_name, global_name);
3638 Py_DECREF(global_name);
3639 }
3640 }
3641 Py_DECREF(module_name);
3642
3643 if (global == NULL)
3644 return -1;
3645 PDATA_PUSH(self->stack, global, -1);
3646 return 0;
3647}
3648
3649static int
3650load_persid(UnpicklerObject *self)
3651{
3652 PyObject *pid;
3653 Py_ssize_t len;
3654 char *s;
3655
3656 if (self->pers_func) {
3657 if ((len = unpickler_readline(self, &s)) < 0)
3658 return -1;
3659 if (len < 2)
3660 return bad_readline();
3661
3662 pid = PyBytes_FromStringAndSize(s, len - 1);
3663 if (pid == NULL)
3664 return -1;
3665
3666 /* Ugh... this does not leak since unpickler_call() steals the
3667 reference to pid first. */
3668 pid = unpickler_call(self, self->pers_func, pid);
3669 if (pid == NULL)
3670 return -1;
3671
3672 PDATA_PUSH(self->stack, pid, -1);
3673 return 0;
3674 }
3675 else {
3676 PyErr_SetString(UnpicklingError,
3677 "A load persistent id instruction was encountered,\n"
3678 "but no persistent_load function was specified.");
3679 return -1;
3680 }
3681}
3682
3683static int
3684load_binpersid(UnpicklerObject *self)
3685{
3686 PyObject *pid;
3687
3688 if (self->pers_func) {
3689 PDATA_POP(self->stack, pid);
3690 if (pid == NULL)
3691 return -1;
3692
3693 /* Ugh... this does not leak since unpickler_call() steals the
3694 reference to pid first. */
3695 pid = unpickler_call(self, self->pers_func, pid);
3696 if (pid == NULL)
3697 return -1;
3698
3699 PDATA_PUSH(self->stack, pid, -1);
3700 return 0;
3701 }
3702 else {
3703 PyErr_SetString(UnpicklingError,
3704 "A load persistent id instruction was encountered,\n"
3705 "but no persistent_load function was specified.");
3706 return -1;
3707 }
3708}
3709
3710static int
3711load_pop(UnpicklerObject *self)
3712{
Collin Winter8ca69de2009-05-26 16:53:41 +00003713 int len = self->stack->length;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003714
3715 /* Note that we split the (pickle.py) stack into two stacks,
3716 * an object stack and a mark stack. We have to be clever and
3717 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00003718 * mark stack first, and only signalling a stack underflow if
3719 * the object stack is empty and the mark stack doesn't match
3720 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003721 */
Collin Winter8ca69de2009-05-26 16:53:41 +00003722 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003723 self->num_marks--;
Antoine Pitrou01a15ea2010-01-07 17:57:31 +00003724 } else if (len > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003725 len--;
3726 Py_DECREF(self->stack->data[len]);
3727 self->stack->length = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00003728 } else {
3729 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003730 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003731 return 0;
3732}
3733
3734static int
3735load_pop_mark(UnpicklerObject *self)
3736{
3737 int i;
3738
3739 if ((i = marker(self)) < 0)
3740 return -1;
3741
3742 Pdata_clear(self->stack, i);
3743
3744 return 0;
3745}
3746
3747static int
3748load_dup(UnpicklerObject *self)
3749{
3750 PyObject *last;
3751 int len;
3752
3753 if ((len = self->stack->length) <= 0)
3754 return stack_underflow();
3755 last = self->stack->data[len - 1];
3756 PDATA_APPEND(self->stack, last, -1);
3757 return 0;
3758}
3759
3760static int
3761load_get(UnpicklerObject *self)
3762{
3763 PyObject *key, *value;
3764 Py_ssize_t len;
3765 char *s;
3766
3767 if ((len = unpickler_readline(self, &s)) < 0)
3768 return -1;
3769 if (len < 2)
3770 return bad_readline();
3771
3772 key = PyLong_FromString(s, NULL, 10);
3773 if (key == NULL)
3774 return -1;
3775
3776 value = PyDict_GetItemWithError(self->memo, key);
3777 if (value == NULL) {
3778 if (!PyErr_Occurred())
3779 PyErr_SetObject(PyExc_KeyError, key);
3780 Py_DECREF(key);
3781 return -1;
3782 }
3783 Py_DECREF(key);
3784
3785 PDATA_APPEND(self->stack, value, -1);
3786 return 0;
3787}
3788
3789static int
3790load_binget(UnpicklerObject *self)
3791{
3792 PyObject *key, *value;
3793 char *s;
3794
3795 if (unpickler_read(self, &s, 1) < 0)
3796 return -1;
3797
3798 /* Here, the unsigned cast is necessary to avoid negative values. */
3799 key = PyLong_FromLong((long)(unsigned char)s[0]);
3800 if (key == NULL)
3801 return -1;
3802
3803 value = PyDict_GetItemWithError(self->memo, key);
3804 if (value == NULL) {
3805 if (!PyErr_Occurred())
3806 PyErr_SetObject(PyExc_KeyError, key);
3807 Py_DECREF(key);
3808 return -1;
3809 }
3810 Py_DECREF(key);
3811
3812 PDATA_APPEND(self->stack, value, -1);
3813 return 0;
3814}
3815
3816static int
3817load_long_binget(UnpicklerObject *self)
3818{
3819 PyObject *key, *value;
3820 char *s;
3821 long k;
3822
3823 if (unpickler_read(self, &s, 4) < 0)
3824 return -1;
3825
3826 k = (long)(unsigned char)s[0];
3827 k |= (long)(unsigned char)s[1] << 8;
3828 k |= (long)(unsigned char)s[2] << 16;
3829 k |= (long)(unsigned char)s[3] << 24;
3830
3831 key = PyLong_FromLong(k);
3832 if (key == NULL)
3833 return -1;
3834
3835 value = PyDict_GetItemWithError(self->memo, key);
3836 if (value == NULL) {
3837 if (!PyErr_Occurred())
3838 PyErr_SetObject(PyExc_KeyError, key);
3839 Py_DECREF(key);
3840 return -1;
3841 }
3842 Py_DECREF(key);
3843
3844 PDATA_APPEND(self->stack, value, -1);
3845 return 0;
3846}
3847
3848/* Push an object from the extension registry (EXT[124]). nbytes is
3849 * the number of bytes following the opcode, holding the index (code) value.
3850 */
3851static int
3852load_extension(UnpicklerObject *self, int nbytes)
3853{
3854 char *codebytes; /* the nbytes bytes after the opcode */
3855 long code; /* calc_binint returns long */
3856 PyObject *py_code; /* code as a Python int */
3857 PyObject *obj; /* the object to push */
3858 PyObject *pair; /* (module_name, class_name) */
3859 PyObject *module_name, *class_name;
3860
3861 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
3862 if (unpickler_read(self, &codebytes, nbytes) < 0)
3863 return -1;
3864 code = calc_binint(codebytes, nbytes);
3865 if (code <= 0) { /* note that 0 is forbidden */
3866 /* Corrupt or hostile pickle. */
3867 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
3868 return -1;
3869 }
3870
3871 /* Look for the code in the cache. */
3872 py_code = PyLong_FromLong(code);
3873 if (py_code == NULL)
3874 return -1;
3875 obj = PyDict_GetItem(extension_cache, py_code);
3876 if (obj != NULL) {
3877 /* Bingo. */
3878 Py_DECREF(py_code);
3879 PDATA_APPEND(self->stack, obj, -1);
3880 return 0;
3881 }
3882
3883 /* Look up the (module_name, class_name) pair. */
3884 pair = PyDict_GetItem(inverted_registry, py_code);
3885 if (pair == NULL) {
3886 Py_DECREF(py_code);
3887 PyErr_Format(PyExc_ValueError, "unregistered extension "
3888 "code %ld", code);
3889 return -1;
3890 }
3891 /* Since the extension registry is manipulable via Python code,
3892 * confirm that pair is really a 2-tuple of strings.
3893 */
3894 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
3895 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
3896 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
3897 Py_DECREF(py_code);
3898 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
3899 "isn't a 2-tuple of strings", code);
3900 return -1;
3901 }
3902 /* Load the object. */
3903 obj = find_class(self, module_name, class_name);
3904 if (obj == NULL) {
3905 Py_DECREF(py_code);
3906 return -1;
3907 }
3908 /* Cache code -> obj. */
3909 code = PyDict_SetItem(extension_cache, py_code, obj);
3910 Py_DECREF(py_code);
3911 if (code < 0) {
3912 Py_DECREF(obj);
3913 return -1;
3914 }
3915 PDATA_PUSH(self->stack, obj, -1);
3916 return 0;
3917}
3918
3919static int
3920load_put(UnpicklerObject *self)
3921{
3922 PyObject *key, *value;
3923 Py_ssize_t len;
3924 char *s;
3925 int x;
3926
3927 if ((len = unpickler_readline(self, &s)) < 0)
3928 return -1;
3929 if (len < 2)
3930 return bad_readline();
3931 if ((x = self->stack->length) <= 0)
3932 return stack_underflow();
3933
3934 key = PyLong_FromString(s, NULL, 10);
3935 if (key == NULL)
3936 return -1;
3937 value = self->stack->data[x - 1];
3938
3939 x = PyDict_SetItem(self->memo, key, value);
3940 Py_DECREF(key);
3941 return x;
3942}
3943
3944static int
3945load_binput(UnpicklerObject *self)
3946{
3947 PyObject *key, *value;
3948 char *s;
3949 int x;
3950
3951 if (unpickler_read(self, &s, 1) < 0)
3952 return -1;
3953 if ((x = self->stack->length) <= 0)
3954 return stack_underflow();
3955
3956 key = PyLong_FromLong((long)(unsigned char)s[0]);
3957 if (key == NULL)
3958 return -1;
3959 value = self->stack->data[x - 1];
3960
3961 x = PyDict_SetItem(self->memo, key, value);
3962 Py_DECREF(key);
3963 return x;
3964}
3965
3966static int
3967load_long_binput(UnpicklerObject *self)
3968{
3969 PyObject *key, *value;
3970 long k;
3971 char *s;
3972 int x;
3973
3974 if (unpickler_read(self, &s, 4) < 0)
3975 return -1;
3976 if ((x = self->stack->length) <= 0)
3977 return stack_underflow();
3978
3979 k = (long)(unsigned char)s[0];
3980 k |= (long)(unsigned char)s[1] << 8;
3981 k |= (long)(unsigned char)s[2] << 16;
3982 k |= (long)(unsigned char)s[3] << 24;
3983
3984 key = PyLong_FromLong(k);
3985 if (key == NULL)
3986 return -1;
3987 value = self->stack->data[x - 1];
3988
3989 x = PyDict_SetItem(self->memo, key, value);
3990 Py_DECREF(key);
3991 return x;
3992}
3993
3994static int
3995do_append(UnpicklerObject *self, int x)
3996{
3997 PyObject *value;
3998 PyObject *list;
3999 int len, i;
4000
4001 len = self->stack->length;
4002 if (x > len || x <= 0)
4003 return stack_underflow();
4004 if (len == x) /* nothing to do */
4005 return 0;
4006
4007 list = self->stack->data[x - 1];
4008
4009 if (PyList_Check(list)) {
4010 PyObject *slice;
4011 Py_ssize_t list_len;
4012
4013 slice = Pdata_poplist(self->stack, x);
4014 if (!slice)
4015 return -1;
4016 list_len = PyList_GET_SIZE(list);
4017 i = PyList_SetSlice(list, list_len, list_len, slice);
4018 Py_DECREF(slice);
4019 return i;
4020 }
4021 else {
4022 PyObject *append_func;
4023
4024 append_func = PyObject_GetAttrString(list, "append");
4025 if (append_func == NULL)
4026 return -1;
4027 for (i = x; i < len; i++) {
4028 PyObject *result;
4029
4030 value = self->stack->data[i];
4031 result = unpickler_call(self, append_func, value);
4032 if (result == NULL) {
4033 Pdata_clear(self->stack, i + 1);
4034 self->stack->length = x;
4035 return -1;
4036 }
4037 Py_DECREF(result);
4038 }
4039 self->stack->length = x;
4040 }
4041
4042 return 0;
4043}
4044
4045static int
4046load_append(UnpicklerObject *self)
4047{
4048 return do_append(self, self->stack->length - 1);
4049}
4050
4051static int
4052load_appends(UnpicklerObject *self)
4053{
4054 return do_append(self, marker(self));
4055}
4056
4057static int
4058do_setitems(UnpicklerObject *self, int x)
4059{
4060 PyObject *value, *key;
4061 PyObject *dict;
4062 int len, i;
4063 int status = 0;
4064
4065 len = self->stack->length;
4066 if (x > len || x <= 0)
4067 return stack_underflow();
4068 if (len == x) /* nothing to do */
4069 return 0;
4070 if ((len - x) % 2 != 0) {
4071 /* Currupt or hostile pickle -- we never write one like this. */
4072 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
4073 return -1;
4074 }
4075
4076 /* Here, dict does not actually need to be a PyDict; it could be anything
4077 that supports the __setitem__ attribute. */
4078 dict = self->stack->data[x - 1];
4079
4080 for (i = x + 1; i < len; i += 2) {
4081 key = self->stack->data[i - 1];
4082 value = self->stack->data[i];
4083 if (PyObject_SetItem(dict, key, value) < 0) {
4084 status = -1;
4085 break;
4086 }
4087 }
4088
4089 Pdata_clear(self->stack, x);
4090 return status;
4091}
4092
4093static int
4094load_setitem(UnpicklerObject *self)
4095{
4096 return do_setitems(self, self->stack->length - 2);
4097}
4098
4099static int
4100load_setitems(UnpicklerObject *self)
4101{
4102 return do_setitems(self, marker(self));
4103}
4104
4105static int
4106load_build(UnpicklerObject *self)
4107{
4108 PyObject *state, *inst, *slotstate;
4109 PyObject *setstate;
4110 int status = 0;
4111
4112 /* Stack is ... instance, state. We want to leave instance at
4113 * the stack top, possibly mutated via instance.__setstate__(state).
4114 */
4115 if (self->stack->length < 2)
4116 return stack_underflow();
4117
4118 PDATA_POP(self->stack, state);
4119 if (state == NULL)
4120 return -1;
4121
4122 inst = self->stack->data[self->stack->length - 1];
4123
4124 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00004125 if (setstate == NULL) {
4126 if (PyErr_ExceptionMatches(PyExc_AttributeError))
4127 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00004128 else {
4129 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00004130 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00004131 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004132 }
4133 else {
4134 PyObject *result;
4135
4136 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitroud79dc622008-09-05 00:03:33 +00004137 /* Ugh... this does not leak since unpickler_call() steals the
4138 reference to state first. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004139 result = unpickler_call(self, setstate, state);
4140 Py_DECREF(setstate);
4141 if (result == NULL)
4142 return -1;
4143 Py_DECREF(result);
4144 return 0;
4145 }
4146
4147 /* A default __setstate__. First see whether state embeds a
4148 * slot state dict too (a proto 2 addition).
4149 */
4150 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
4151 PyObject *tmp = state;
4152
4153 state = PyTuple_GET_ITEM(tmp, 0);
4154 slotstate = PyTuple_GET_ITEM(tmp, 1);
4155 Py_INCREF(state);
4156 Py_INCREF(slotstate);
4157 Py_DECREF(tmp);
4158 }
4159 else
4160 slotstate = NULL;
4161
4162 /* Set inst.__dict__ from the state dict (if any). */
4163 if (state != Py_None) {
4164 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00004165 PyObject *d_key, *d_value;
4166 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004167
4168 if (!PyDict_Check(state)) {
4169 PyErr_SetString(UnpicklingError, "state is not a dictionary");
4170 goto error;
4171 }
4172 dict = PyObject_GetAttrString(inst, "__dict__");
4173 if (dict == NULL)
4174 goto error;
4175
Antoine Pitroua9f48a02009-05-02 21:41:14 +00004176 i = 0;
4177 while (PyDict_Next(state, &i, &d_key, &d_value)) {
4178 /* normally the keys for instance attributes are
4179 interned. we should try to do that here. */
4180 Py_INCREF(d_key);
4181 if (PyUnicode_CheckExact(d_key))
4182 PyUnicode_InternInPlace(&d_key);
4183 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
4184 Py_DECREF(d_key);
4185 goto error;
4186 }
4187 Py_DECREF(d_key);
4188 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004189 Py_DECREF(dict);
4190 }
4191
4192 /* Also set instance attributes from the slotstate dict (if any). */
4193 if (slotstate != NULL) {
4194 PyObject *d_key, *d_value;
4195 Py_ssize_t i;
4196
4197 if (!PyDict_Check(slotstate)) {
4198 PyErr_SetString(UnpicklingError,
4199 "slot state is not a dictionary");
4200 goto error;
4201 }
4202 i = 0;
4203 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
4204 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
4205 goto error;
4206 }
4207 }
4208
4209 if (0) {
4210 error:
4211 status = -1;
4212 }
4213
4214 Py_DECREF(state);
4215 Py_XDECREF(slotstate);
4216 return status;
4217}
4218
4219static int
4220load_mark(UnpicklerObject *self)
4221{
4222
4223 /* Note that we split the (pickle.py) stack into two stacks, an
4224 * object stack and a mark stack. Here we push a mark onto the
4225 * mark stack.
4226 */
4227
4228 if ((self->num_marks + 1) >= self->marks_size) {
4229 size_t alloc;
4230 int *marks;
4231
4232 /* Use the size_t type to check for overflow. */
4233 alloc = ((size_t)self->num_marks << 1) + 20;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00004234 if (alloc > PY_SSIZE_T_MAX ||
4235 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004236 PyErr_NoMemory();
4237 return -1;
4238 }
4239
4240 if (self->marks == NULL)
4241 marks = (int *)PyMem_Malloc(alloc * sizeof(int));
4242 else
4243 marks = (int *)PyMem_Realloc(self->marks, alloc * sizeof(int));
4244 if (marks == NULL) {
4245 PyErr_NoMemory();
4246 return -1;
4247 }
4248 self->marks = marks;
4249 self->marks_size = (Py_ssize_t)alloc;
4250 }
4251
4252 self->marks[self->num_marks++] = self->stack->length;
4253
4254 return 0;
4255}
4256
4257static int
4258load_reduce(UnpicklerObject *self)
4259{
4260 PyObject *callable = NULL;
4261 PyObject *argtup = NULL;
4262 PyObject *obj = NULL;
4263
4264 PDATA_POP(self->stack, argtup);
4265 if (argtup == NULL)
4266 return -1;
4267 PDATA_POP(self->stack, callable);
4268 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004269 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004270 Py_DECREF(callable);
4271 }
4272 Py_DECREF(argtup);
4273
4274 if (obj == NULL)
4275 return -1;
4276
4277 PDATA_PUSH(self->stack, obj, -1);
4278 return 0;
4279}
4280
4281/* Just raises an error if we don't know the protocol specified. PROTO
4282 * is the first opcode for protocols >= 2.
4283 */
4284static int
4285load_proto(UnpicklerObject *self)
4286{
4287 char *s;
4288 int i;
4289
4290 if (unpickler_read(self, &s, 1) < 0)
4291 return -1;
4292
4293 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004294 if (i <= HIGHEST_PROTOCOL) {
4295 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004296 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004297 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004298
4299 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
4300 return -1;
4301}
4302
4303static PyObject *
4304load(UnpicklerObject *self)
4305{
4306 PyObject *err;
4307 PyObject *value = NULL;
4308 char *s;
4309
4310 self->num_marks = 0;
4311 if (self->stack->length)
4312 Pdata_clear(self->stack, 0);
4313
4314 /* Convenient macros for the dispatch while-switch loop just below. */
4315#define OP(opcode, load_func) \
4316 case opcode: if (load_func(self) < 0) break; continue;
4317
4318#define OP_ARG(opcode, load_func, arg) \
4319 case opcode: if (load_func(self, (arg)) < 0) break; continue;
4320
4321 while (1) {
4322 if (unpickler_read(self, &s, 1) < 0)
4323 break;
4324
4325 switch ((enum opcode)s[0]) {
4326 OP(NONE, load_none)
4327 OP(BININT, load_binint)
4328 OP(BININT1, load_binint1)
4329 OP(BININT2, load_binint2)
4330 OP(INT, load_int)
4331 OP(LONG, load_long)
4332 OP_ARG(LONG1, load_counted_long, 1)
4333 OP_ARG(LONG4, load_counted_long, 4)
4334 OP(FLOAT, load_float)
4335 OP(BINFLOAT, load_binfloat)
4336 OP(BINBYTES, load_binbytes)
4337 OP(SHORT_BINBYTES, load_short_binbytes)
4338 OP(BINSTRING, load_binstring)
4339 OP(SHORT_BINSTRING, load_short_binstring)
4340 OP(STRING, load_string)
4341 OP(UNICODE, load_unicode)
4342 OP(BINUNICODE, load_binunicode)
4343 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
4344 OP_ARG(TUPLE1, load_counted_tuple, 1)
4345 OP_ARG(TUPLE2, load_counted_tuple, 2)
4346 OP_ARG(TUPLE3, load_counted_tuple, 3)
4347 OP(TUPLE, load_tuple)
4348 OP(EMPTY_LIST, load_empty_list)
4349 OP(LIST, load_list)
4350 OP(EMPTY_DICT, load_empty_dict)
4351 OP(DICT, load_dict)
4352 OP(OBJ, load_obj)
4353 OP(INST, load_inst)
4354 OP(NEWOBJ, load_newobj)
4355 OP(GLOBAL, load_global)
4356 OP(APPEND, load_append)
4357 OP(APPENDS, load_appends)
4358 OP(BUILD, load_build)
4359 OP(DUP, load_dup)
4360 OP(BINGET, load_binget)
4361 OP(LONG_BINGET, load_long_binget)
4362 OP(GET, load_get)
4363 OP(MARK, load_mark)
4364 OP(BINPUT, load_binput)
4365 OP(LONG_BINPUT, load_long_binput)
4366 OP(PUT, load_put)
4367 OP(POP, load_pop)
4368 OP(POP_MARK, load_pop_mark)
4369 OP(SETITEM, load_setitem)
4370 OP(SETITEMS, load_setitems)
4371 OP(PERSID, load_persid)
4372 OP(BINPERSID, load_binpersid)
4373 OP(REDUCE, load_reduce)
4374 OP(PROTO, load_proto)
4375 OP_ARG(EXT1, load_extension, 1)
4376 OP_ARG(EXT2, load_extension, 2)
4377 OP_ARG(EXT4, load_extension, 4)
4378 OP_ARG(NEWTRUE, load_bool, Py_True)
4379 OP_ARG(NEWFALSE, load_bool, Py_False)
4380
4381 case STOP:
4382 break;
4383
4384 case '\0':
4385 PyErr_SetNone(PyExc_EOFError);
4386 return NULL;
4387
4388 default:
4389 PyErr_Format(UnpicklingError,
4390 "invalid load key, '%c'.", s[0]);
4391 return NULL;
4392 }
4393
4394 break; /* and we are done! */
4395 }
4396
4397 /* XXX: It is not clear what this is actually for. */
4398 if ((err = PyErr_Occurred())) {
4399 if (err == PyExc_EOFError) {
4400 PyErr_SetNone(PyExc_EOFError);
4401 }
4402 return NULL;
4403 }
4404
4405 PDATA_POP(self->stack, value);
4406 return value;
4407}
4408
4409PyDoc_STRVAR(Unpickler_load_doc,
4410"load() -> object. Load a pickle."
4411"\n"
4412"Read a pickled object representation from the open file object given in\n"
4413"the constructor, and return the reconstituted object hierarchy specified\n"
4414"therein.\n");
4415
4416static PyObject *
4417Unpickler_load(UnpicklerObject *self)
4418{
4419 /* Check whether the Unpickler was initialized correctly. This prevents
4420 segfaulting if a subclass overridden __init__ with a function that does
4421 not call Unpickler.__init__(). Here, we simply ensure that self->read
4422 is not NULL. */
4423 if (self->read == NULL) {
4424 PyErr_Format(UnpicklingError,
4425 "Unpickler.__init__() was not called by %s.__init__()",
4426 Py_TYPE(self)->tp_name);
4427 return NULL;
4428 }
4429
4430 return load(self);
4431}
4432
4433/* The name of find_class() is misleading. In newer pickle protocols, this
4434 function is used for loading any global (i.e., functions), not just
4435 classes. The name is kept only for backward compatibility. */
4436
4437PyDoc_STRVAR(Unpickler_find_class_doc,
4438"find_class(module_name, global_name) -> object.\n"
4439"\n"
4440"Return an object from a specified module, importing the module if\n"
4441"necessary. Subclasses may override this method (e.g. to restrict\n"
4442"unpickling of arbitrary classes and functions).\n"
4443"\n"
4444"This method is called whenever a class or a function object is\n"
4445"needed. Both arguments passed are str objects.\n");
4446
4447static PyObject *
4448Unpickler_find_class(UnpicklerObject *self, PyObject *args)
4449{
4450 PyObject *global;
4451 PyObject *modules_dict;
4452 PyObject *module;
4453 PyObject *module_name, *global_name;
4454
4455 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
4456 &module_name, &global_name))
4457 return NULL;
4458
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004459 /* Try to map the old names used in Python 2.x to the new ones used in
4460 Python 3.x. We do this only with old pickle protocols and when the
4461 user has not disabled the feature. */
4462 if (self->proto < 3 && self->fix_imports) {
4463 PyObject *key;
4464 PyObject *item;
4465
4466 /* Check if the global (i.e., a function or a class) was renamed
4467 or moved to another module. */
4468 key = PyTuple_Pack(2, module_name, global_name);
4469 if (key == NULL)
4470 return NULL;
4471 item = PyDict_GetItemWithError(name_mapping_2to3, key);
4472 Py_DECREF(key);
4473 if (item) {
4474 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
4475 PyErr_Format(PyExc_RuntimeError,
4476 "_compat_pickle.NAME_MAPPING values should be "
4477 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
4478 return NULL;
4479 }
4480 module_name = PyTuple_GET_ITEM(item, 0);
4481 global_name = PyTuple_GET_ITEM(item, 1);
4482 if (!PyUnicode_Check(module_name) ||
4483 !PyUnicode_Check(global_name)) {
4484 PyErr_Format(PyExc_RuntimeError,
4485 "_compat_pickle.NAME_MAPPING values should be "
4486 "pairs of str, not (%.200s, %.200s)",
4487 Py_TYPE(module_name)->tp_name,
4488 Py_TYPE(global_name)->tp_name);
4489 return NULL;
4490 }
4491 }
4492 else if (PyErr_Occurred()) {
4493 return NULL;
4494 }
4495
4496 /* Check if the module was renamed. */
4497 item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
4498 if (item) {
4499 if (!PyUnicode_Check(item)) {
4500 PyErr_Format(PyExc_RuntimeError,
4501 "_compat_pickle.IMPORT_MAPPING values should be "
4502 "strings, not %.200s", Py_TYPE(item)->tp_name);
4503 return NULL;
4504 }
4505 module_name = item;
4506 }
4507 else if (PyErr_Occurred()) {
4508 return NULL;
4509 }
4510 }
4511
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004512 modules_dict = PySys_GetObject("modules");
4513 if (modules_dict == NULL)
4514 return NULL;
4515
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004516 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004517 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004518 if (PyErr_Occurred())
4519 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004520 module = PyImport_Import(module_name);
4521 if (module == NULL)
4522 return NULL;
4523 global = PyObject_GetAttr(module, global_name);
4524 Py_DECREF(module);
4525 }
4526 else {
4527 global = PyObject_GetAttr(module, global_name);
4528 }
4529 return global;
4530}
4531
4532static struct PyMethodDef Unpickler_methods[] = {
4533 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
4534 Unpickler_load_doc},
4535 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
4536 Unpickler_find_class_doc},
4537 {NULL, NULL} /* sentinel */
4538};
4539
4540static void
4541Unpickler_dealloc(UnpicklerObject *self)
4542{
4543 PyObject_GC_UnTrack((PyObject *)self);
4544 Py_XDECREF(self->readline);
4545 Py_XDECREF(self->read);
4546 Py_XDECREF(self->memo);
4547 Py_XDECREF(self->stack);
4548 Py_XDECREF(self->pers_func);
4549 Py_XDECREF(self->arg);
4550 Py_XDECREF(self->last_string);
4551
4552 PyMem_Free(self->marks);
4553 free(self->encoding);
4554 free(self->errors);
4555
4556 Py_TYPE(self)->tp_free((PyObject *)self);
4557}
4558
4559static int
4560Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
4561{
4562 Py_VISIT(self->readline);
4563 Py_VISIT(self->read);
4564 Py_VISIT(self->memo);
4565 Py_VISIT(self->stack);
4566 Py_VISIT(self->pers_func);
4567 Py_VISIT(self->arg);
4568 Py_VISIT(self->last_string);
4569 return 0;
4570}
4571
4572static int
4573Unpickler_clear(UnpicklerObject *self)
4574{
4575 Py_CLEAR(self->readline);
4576 Py_CLEAR(self->read);
4577 Py_CLEAR(self->memo);
4578 Py_CLEAR(self->stack);
4579 Py_CLEAR(self->pers_func);
4580 Py_CLEAR(self->arg);
4581 Py_CLEAR(self->last_string);
4582
4583 PyMem_Free(self->marks);
4584 self->marks = NULL;
4585 free(self->encoding);
4586 self->encoding = NULL;
4587 free(self->errors);
4588 self->errors = NULL;
4589
4590 return 0;
4591}
4592
4593PyDoc_STRVAR(Unpickler_doc,
4594"Unpickler(file, *, encoding='ASCII', errors='strict')"
4595"\n"
4596"This takes a binary file for reading a pickle data stream.\n"
4597"\n"
4598"The protocol version of the pickle is detected automatically, so no\n"
4599"proto argument is needed.\n"
4600"\n"
4601"The file-like object must have two methods, a read() method\n"
4602"that takes an integer argument, and a readline() method that\n"
4603"requires no arguments. Both methods should return bytes.\n"
4604"Thus file-like object can be a binary file object opened for\n"
4605"reading, a BytesIO object, or any other custom object that\n"
4606"meets this interface.\n"
4607"\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004608"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
4609"which are used to control compatiblity support for pickle stream\n"
4610"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
4611"map the old Python 2.x names to the new names used in Python 3.x. The\n"
4612"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
4613"instances pickled by Python 2.x; these default to 'ASCII' and\n"
4614"'strict', respectively.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004615
4616static int
4617Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
4618{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004619 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004620 PyObject *file;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004621 int fix_imports = 1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004622 char *encoding = NULL;
4623 char *errors = NULL;
4624
4625 /* XXX: That is an horrible error message. But, I don't know how to do
4626 better... */
4627 if (Py_SIZE(args) != 1) {
4628 PyErr_Format(PyExc_TypeError,
4629 "%s takes exactly one positional argument (%zd given)",
4630 Py_TYPE(self)->tp_name, Py_SIZE(args));
4631 return -1;
4632 }
4633
4634 /* Arguments parsing needs to be done in the __init__() method to allow
4635 subclasses to define their own __init__() method, which may (or may
4636 not) support Unpickler arguments. However, this means we need to be
4637 extra careful in the other Unpickler methods, since a subclass could
4638 forget to call Unpickler.__init__() thus breaking our internal
4639 invariants. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004640 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|iss:Unpickler", kwlist,
4641 &file, &fix_imports, &encoding, &errors))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004642 return -1;
4643
4644 /* In case of multiple __init__() calls, clear previous content. */
4645 if (self->read != NULL)
4646 (void)Unpickler_clear(self);
4647
4648 self->read = PyObject_GetAttrString(file, "read");
4649 self->readline = PyObject_GetAttrString(file, "readline");
4650 if (self->readline == NULL || self->read == NULL)
4651 return -1;
4652
4653 if (encoding == NULL)
4654 encoding = "ASCII";
4655 if (errors == NULL)
4656 errors = "strict";
4657
4658 self->encoding = strdup(encoding);
4659 self->errors = strdup(errors);
4660 if (self->encoding == NULL || self->errors == NULL) {
4661 PyErr_NoMemory();
4662 return -1;
4663 }
4664
4665 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
4666 self->pers_func = PyObject_GetAttrString((PyObject *)self,
4667 "persistent_load");
4668 if (self->pers_func == NULL)
4669 return -1;
4670 }
4671 else {
4672 self->pers_func = NULL;
4673 }
4674
4675 self->stack = (Pdata *)Pdata_New();
4676 if (self->stack == NULL)
4677 return -1;
4678
4679 self->memo = PyDict_New();
4680 if (self->memo == NULL)
4681 return -1;
4682
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00004683 self->last_string = NULL;
4684 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004685 self->proto = 0;
4686 self->fix_imports = fix_imports;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00004687
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004688 return 0;
4689}
4690
4691static PyObject *
4692Unpickler_get_memo(UnpicklerObject *self)
4693{
4694 if (self->memo == NULL)
4695 PyErr_SetString(PyExc_AttributeError, "memo");
4696 else
4697 Py_INCREF(self->memo);
4698 return self->memo;
4699}
4700
4701static int
4702Unpickler_set_memo(UnpicklerObject *self, PyObject *value)
4703{
4704 PyObject *tmp;
4705
4706 if (value == NULL) {
4707 PyErr_SetString(PyExc_TypeError,
4708 "attribute deletion is not supported");
4709 return -1;
4710 }
4711 if (!PyDict_Check(value)) {
4712 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
4713 return -1;
4714 }
4715
4716 tmp = self->memo;
4717 Py_INCREF(value);
4718 self->memo = value;
4719 Py_XDECREF(tmp);
4720
4721 return 0;
4722}
4723
4724static PyObject *
4725Unpickler_get_persload(UnpicklerObject *self)
4726{
4727 if (self->pers_func == NULL)
4728 PyErr_SetString(PyExc_AttributeError, "persistent_load");
4729 else
4730 Py_INCREF(self->pers_func);
4731 return self->pers_func;
4732}
4733
4734static int
4735Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
4736{
4737 PyObject *tmp;
4738
4739 if (value == NULL) {
4740 PyErr_SetString(PyExc_TypeError,
4741 "attribute deletion is not supported");
4742 return -1;
4743 }
4744 if (!PyCallable_Check(value)) {
4745 PyErr_SetString(PyExc_TypeError,
4746 "persistent_load must be a callable taking "
4747 "one argument");
4748 return -1;
4749 }
4750
4751 tmp = self->pers_func;
4752 Py_INCREF(value);
4753 self->pers_func = value;
4754 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
4755
4756 return 0;
4757}
4758
4759static PyGetSetDef Unpickler_getsets[] = {
4760 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
4761 {"persistent_load", (getter)Unpickler_get_persload,
4762 (setter)Unpickler_set_persload},
4763 {NULL}
4764};
4765
4766static PyTypeObject Unpickler_Type = {
4767 PyVarObject_HEAD_INIT(NULL, 0)
4768 "_pickle.Unpickler", /*tp_name*/
4769 sizeof(UnpicklerObject), /*tp_basicsize*/
4770 0, /*tp_itemsize*/
4771 (destructor)Unpickler_dealloc, /*tp_dealloc*/
4772 0, /*tp_print*/
4773 0, /*tp_getattr*/
4774 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00004775 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004776 0, /*tp_repr*/
4777 0, /*tp_as_number*/
4778 0, /*tp_as_sequence*/
4779 0, /*tp_as_mapping*/
4780 0, /*tp_hash*/
4781 0, /*tp_call*/
4782 0, /*tp_str*/
4783 0, /*tp_getattro*/
4784 0, /*tp_setattro*/
4785 0, /*tp_as_buffer*/
4786 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4787 Unpickler_doc, /*tp_doc*/
4788 (traverseproc)Unpickler_traverse, /*tp_traverse*/
4789 (inquiry)Unpickler_clear, /*tp_clear*/
4790 0, /*tp_richcompare*/
4791 0, /*tp_weaklistoffset*/
4792 0, /*tp_iter*/
4793 0, /*tp_iternext*/
4794 Unpickler_methods, /*tp_methods*/
4795 0, /*tp_members*/
4796 Unpickler_getsets, /*tp_getset*/
4797 0, /*tp_base*/
4798 0, /*tp_dict*/
4799 0, /*tp_descr_get*/
4800 0, /*tp_descr_set*/
4801 0, /*tp_dictoffset*/
4802 (initproc)Unpickler_init, /*tp_init*/
4803 PyType_GenericAlloc, /*tp_alloc*/
4804 PyType_GenericNew, /*tp_new*/
4805 PyObject_GC_Del, /*tp_free*/
4806 0, /*tp_is_gc*/
4807};
4808
4809static int
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004810initmodule(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004811{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004812 PyObject *copyreg = NULL;
4813 PyObject *compat_pickle = NULL;
4814
4815 /* XXX: We should ensure that the types of the dictionaries imported are
4816 exactly PyDict objects. Otherwise, it is possible to crash the pickle
4817 since we use the PyDict API directly to access these dictionaries. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004818
4819 copyreg = PyImport_ImportModule("copyreg");
4820 if (!copyreg)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004821 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004822 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
4823 if (!dispatch_table)
4824 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004825 extension_registry = \
4826 PyObject_GetAttrString(copyreg, "_extension_registry");
4827 if (!extension_registry)
4828 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004829 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
4830 if (!inverted_registry)
4831 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004832 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
4833 if (!extension_cache)
4834 goto error;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004835 Py_CLEAR(copyreg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004836
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004837 /* Load the 2.x -> 3.x stdlib module mapping tables */
4838 compat_pickle = PyImport_ImportModule("_compat_pickle");
4839 if (!compat_pickle)
4840 goto error;
4841 name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
4842 if (!name_mapping_2to3)
4843 goto error;
4844 if (!PyDict_CheckExact(name_mapping_2to3)) {
4845 PyErr_Format(PyExc_RuntimeError,
4846 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
4847 Py_TYPE(name_mapping_2to3)->tp_name);
4848 goto error;
4849 }
4850 import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
4851 "IMPORT_MAPPING");
4852 if (!import_mapping_2to3)
4853 goto error;
4854 if (!PyDict_CheckExact(import_mapping_2to3)) {
4855 PyErr_Format(PyExc_RuntimeError,
4856 "_compat_pickle.IMPORT_MAPPING should be a dict, "
4857 "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
4858 goto error;
4859 }
4860 /* ... and the 3.x -> 2.x mapping tables */
4861 name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
4862 "REVERSE_NAME_MAPPING");
4863 if (!name_mapping_3to2)
4864 goto error;
4865 if (!PyDict_CheckExact(name_mapping_3to2)) {
4866 PyErr_Format(PyExc_RuntimeError,
4867 "_compat_pickle.REVERSE_NAME_MAPPING shouldbe a dict, "
4868 "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
4869 goto error;
4870 }
4871 import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
4872 "REVERSE_IMPORT_MAPPING");
4873 if (!import_mapping_3to2)
4874 goto error;
4875 if (!PyDict_CheckExact(import_mapping_3to2)) {
4876 PyErr_Format(PyExc_RuntimeError,
4877 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
4878 "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
4879 goto error;
4880 }
4881 Py_CLEAR(compat_pickle);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004882
4883 empty_tuple = PyTuple_New(0);
4884 if (empty_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004885 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004886 two_tuple = PyTuple_New(2);
4887 if (two_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004888 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004889 /* We use this temp container with no regard to refcounts, or to
4890 * keeping containees alive. Exempt from GC, because we don't
4891 * want anything looking at two_tuple() by magic.
4892 */
4893 PyObject_GC_UnTrack(two_tuple);
4894
4895 return 0;
4896
4897 error:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004898 Py_CLEAR(copyreg);
4899 Py_CLEAR(dispatch_table);
4900 Py_CLEAR(extension_registry);
4901 Py_CLEAR(inverted_registry);
4902 Py_CLEAR(extension_cache);
4903 Py_CLEAR(compat_pickle);
4904 Py_CLEAR(name_mapping_2to3);
4905 Py_CLEAR(import_mapping_2to3);
4906 Py_CLEAR(name_mapping_3to2);
4907 Py_CLEAR(import_mapping_3to2);
4908 Py_CLEAR(empty_tuple);
4909 Py_CLEAR(two_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004910 return -1;
4911}
4912
4913static struct PyModuleDef _picklemodule = {
4914 PyModuleDef_HEAD_INIT,
4915 "_pickle",
4916 pickle_module_doc,
4917 -1,
4918 NULL,
4919 NULL,
4920 NULL,
4921 NULL,
4922 NULL
4923};
4924
4925PyMODINIT_FUNC
4926PyInit__pickle(void)
4927{
4928 PyObject *m;
4929
4930 if (PyType_Ready(&Unpickler_Type) < 0)
4931 return NULL;
4932 if (PyType_Ready(&Pickler_Type) < 0)
4933 return NULL;
4934 if (PyType_Ready(&Pdata_Type) < 0)
4935 return NULL;
4936
4937 /* Create the module and add the functions. */
4938 m = PyModule_Create(&_picklemodule);
4939 if (m == NULL)
4940 return NULL;
4941
4942 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
4943 return NULL;
4944 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
4945 return NULL;
4946
4947 /* Initialize the exceptions. */
4948 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
4949 if (PickleError == NULL)
4950 return NULL;
4951 PicklingError = \
4952 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
4953 if (PicklingError == NULL)
4954 return NULL;
4955 UnpicklingError = \
4956 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
4957 if (UnpicklingError == NULL)
4958 return NULL;
4959
4960 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
4961 return NULL;
4962 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
4963 return NULL;
4964 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
4965 return NULL;
4966
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004967 if (initmodule() < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004968 return NULL;
4969
4970 return m;
4971}