blob: 754d13275cc17acff8347aca739338a5c32819c2 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
13
14/* Pickle opcodes. These must be kept updated with pickle.py.
15 Extensive docs are in pickletools.py. */
16enum opcode {
17 MARK = '(',
18 STOP = '.',
19 POP = '0',
20 POP_MARK = '1',
21 DUP = '2',
22 FLOAT = 'F',
23 INT = 'I',
24 BININT = 'J',
25 BININT1 = 'K',
26 LONG = 'L',
27 BININT2 = 'M',
28 NONE = 'N',
29 PERSID = 'P',
30 BINPERSID = 'Q',
31 REDUCE = 'R',
32 STRING = 'S',
33 BINSTRING = 'T',
34 SHORT_BINSTRING = 'U',
35 UNICODE = 'V',
36 BINUNICODE = 'X',
37 APPEND = 'a',
38 BUILD = 'b',
39 GLOBAL = 'c',
40 DICT = 'd',
41 EMPTY_DICT = '}',
42 APPENDS = 'e',
43 GET = 'g',
44 BINGET = 'h',
45 INST = 'i',
46 LONG_BINGET = 'j',
47 LIST = 'l',
48 EMPTY_LIST = ']',
49 OBJ = 'o',
50 PUT = 'p',
51 BINPUT = 'q',
52 LONG_BINPUT = 'r',
53 SETITEM = 's',
54 TUPLE = 't',
55 EMPTY_TUPLE = ')',
56 SETITEMS = 'u',
57 BINFLOAT = 'G',
58
59 /* Protocol 2. */
60 PROTO = '\x80',
61 NEWOBJ = '\x81',
62 EXT1 = '\x82',
63 EXT2 = '\x83',
64 EXT4 = '\x84',
65 TUPLE1 = '\x85',
66 TUPLE2 = '\x86',
67 TUPLE3 = '\x87',
68 NEWTRUE = '\x88',
69 NEWFALSE = '\x89',
70 LONG1 = '\x8a',
71 LONG4 = '\x8b',
72
73 /* Protocol 3 (Python 3.x) */
74 BINBYTES = 'B',
75 SHORT_BINBYTES = 'C',
76};
77
78/* These aren't opcodes -- they're ways to pickle bools before protocol 2
79 * so that unpicklers written before bools were introduced unpickle them
80 * as ints, but unpicklers after can recognize that bools were intended.
81 * Note that protocol 2 added direct ways to pickle bools.
82 */
83#undef TRUE
84#define TRUE "I01\n"
85#undef FALSE
86#define FALSE "I00\n"
87
88enum {
89 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
90 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
91 break if this gets out of synch with pickle.py, but it's unclear that would
92 help anything either. */
93 BATCHSIZE = 1000,
94
95 /* Nesting limit until Pickler, when running in "fast mode", starts
96 checking for self-referential data-structures. */
97 FAST_NESTING_LIMIT = 50,
98
99 /* Size of the write buffer of Pickler. Higher values will reduce the
100 number of calls to the write() method of the output stream. */
101 WRITE_BUF_SIZE = 256,
102};
103
104/* Exception classes for pickle. These should override the ones defined in
105 pickle.py, when the C-optimized Pickler and Unpickler are used. */
106static PyObject *PickleError;
107static PyObject *PicklingError;
108static PyObject *UnpicklingError;
109
110/* copyreg.dispatch_table, {type_object: pickling_function} */
111static PyObject *dispatch_table;
112/* For EXT[124] opcodes. */
113/* copyreg._extension_registry, {(module_name, function_name): code} */
114static PyObject *extension_registry;
115/* copyreg._inverted_registry, {code: (module_name, function_name)} */
116static PyObject *inverted_registry;
117/* copyreg._extension_cache, {code: object} */
118static PyObject *extension_cache;
119
120/* XXX: Are these really nescessary? */
121/* As the name says, an empty tuple. */
122static PyObject *empty_tuple;
123/* For looking up name pairs in copyreg._extension_registry. */
124static PyObject *two_tuple;
125
126static int
127stack_underflow(void)
128{
129 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
130 return -1;
131}
132
133/* Internal data type used as the unpickling stack. */
134typedef struct {
135 PyObject_HEAD
136 int length; /* number of initial slots in data currently used */
137 int size; /* number of slots in data allocated */
138 PyObject **data;
139} Pdata;
140
141static void
142Pdata_dealloc(Pdata *self)
143{
144 int i;
145 PyObject **p;
146
147 for (i = self->length, p = self->data; --i >= 0; p++) {
148 Py_DECREF(*p);
149 }
150 if (self->data)
151 PyMem_Free(self->data);
152 PyObject_Del(self);
153}
154
155static PyTypeObject Pdata_Type = {
156 PyVarObject_HEAD_INIT(NULL, 0)
157 "_pickle.Pdata", /*tp_name*/
158 sizeof(Pdata), /*tp_basicsize*/
159 0, /*tp_itemsize*/
160 (destructor)Pdata_dealloc, /*tp_dealloc*/
161};
162
163static PyObject *
164Pdata_New(void)
165{
166 Pdata *self;
167
168 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
169 return NULL;
170 self->size = 8;
171 self->length = 0;
172 self->data = PyMem_Malloc(self->size * sizeof(PyObject *));
173 if (self->data)
174 return (PyObject *)self;
175 Py_DECREF(self);
176 return PyErr_NoMemory();
177}
178
179
180/* Retain only the initial clearto items. If clearto >= the current
181 * number of items, this is a (non-erroneous) NOP.
182 */
183static int
184Pdata_clear(Pdata *self, int clearto)
185{
186 int i;
187 PyObject **p;
188
189 if (clearto < 0)
190 return stack_underflow();
191 if (clearto >= self->length)
192 return 0;
193
194 for (i = self->length, p = self->data + clearto; --i >= clearto; p++) {
195 Py_CLEAR(*p);
196 }
197 self->length = clearto;
198
199 return 0;
200}
201
202static int
203Pdata_grow(Pdata *self)
204{
205 int bigger;
206 size_t nbytes;
207 PyObject **tmp;
208
209 bigger = (self->size << 1) + 1;
210 if (bigger <= 0) /* was 0, or new value overflows */
211 goto nomemory;
212 if ((int)(size_t)bigger != bigger)
213 goto nomemory;
214 nbytes = (size_t)bigger * sizeof(PyObject *);
215 if (nbytes / sizeof(PyObject *) != (size_t)bigger)
216 goto nomemory;
217 tmp = PyMem_Realloc(self->data, nbytes);
218 if (tmp == NULL)
219 goto nomemory;
220 self->data = tmp;
221 self->size = bigger;
222 return 0;
223
224 nomemory:
225 PyErr_NoMemory();
226 return -1;
227}
228
229/* D is a Pdata*. Pop the topmost element and store it into V, which
230 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
231 * is raised and V is set to NULL.
232 */
233static PyObject *
234Pdata_pop(Pdata *self)
235{
236 if (self->length == 0) {
237 PyErr_SetString(UnpicklingError, "bad pickle data");
238 return NULL;
239 }
240 return self->data[--(self->length)];
241}
242#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
243
244static int
245Pdata_push(Pdata *self, PyObject *obj)
246{
247 if (self->length == self->size && Pdata_grow(self) < 0) {
248 return -1;
249 }
250 self->data[self->length++] = obj;
251 return 0;
252}
253
254/* Push an object on stack, transferring its ownership to the stack. */
255#define PDATA_PUSH(D, O, ER) do { \
256 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
257
258/* Push an object on stack, adding a new reference to the object. */
259#define PDATA_APPEND(D, O, ER) do { \
260 Py_INCREF((O)); \
261 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
262
263static PyObject *
264Pdata_poptuple(Pdata *self, Py_ssize_t start)
265{
266 PyObject *tuple;
267 Py_ssize_t len, i, j;
268
269 len = self->length - start;
270 tuple = PyTuple_New(len);
271 if (tuple == NULL)
272 return NULL;
273 for (i = start, j = 0; j < len; i++, j++)
274 PyTuple_SET_ITEM(tuple, j, self->data[i]);
275
276 self->length = start;
277 return tuple;
278}
279
280static PyObject *
281Pdata_poplist(Pdata *self, Py_ssize_t start)
282{
283 PyObject *list;
284 Py_ssize_t len, i, j;
285
286 len = self->length - start;
287 list = PyList_New(len);
288 if (list == NULL)
289 return NULL;
290 for (i = start, j = 0; j < len; i++, j++)
291 PyList_SET_ITEM(list, j, self->data[i]);
292
293 self->length = start;
294 return list;
295}
296
297typedef struct PicklerObject {
298 PyObject_HEAD
299 PyObject *write; /* write() method of the output stream */
300 PyObject *memo; /* Memo dictionary, keep track of the seen
301 objects to support self-referential objects
302 pickling. */
303 PyObject *pers_func; /* persistent_id() method, can be NULL */
304 PyObject *arg;
305 int proto; /* Pickle protocol number, >= 0 */
306 int bin; /* Boolean, true if proto > 0 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000307 int buf_size; /* Size of the current buffered pickle data */
308 char *write_buf; /* Write buffer, this is to avoid calling the
309 write() method of the output stream too
310 often. */
311 int fast; /* Enable fast mode if set to a true value.
312 The fast mode disable the usage of memo,
313 therefore speeding the pickling process by
314 not generating superfluous PUT opcodes. It
315 should not be used if with self-referential
316 objects. */
317 int fast_nesting;
318 PyObject *fast_memo;
319} PicklerObject;
320
321typedef struct UnpicklerObject {
322 PyObject_HEAD
323 Pdata *stack; /* Pickle data stack, store unpickled objects. */
324 PyObject *readline; /* readline() method of the output stream */
325 PyObject *read; /* read() method of the output stream */
326 PyObject *memo; /* Memo dictionary, provide the objects stored
327 using the PUT opcodes. */
328 PyObject *arg;
329 PyObject *pers_func; /* persistent_load() method, can be NULL. */
330 PyObject *last_string; /* Reference to the last string read by the
331 readline() method. */
332 char *buffer; /* Reading buffer. */
333 char *encoding; /* Name of the encoding to be used for
334 decoding strings pickled using Python
335 2.x. The default value is "ASCII" */
336 char *errors; /* Name of errors handling scheme to used when
337 decoding strings. The default value is
338 "strict". */
339 int *marks; /* Mark stack, used for unpickling container
340 objects. */
341 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
342 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
343} UnpicklerObject;
344
345/* Forward declarations */
346static int save(PicklerObject *, PyObject *, int);
347static int save_reduce(PicklerObject *, PyObject *, PyObject *);
348static PyTypeObject Pickler_Type;
349static PyTypeObject Unpickler_Type;
350
351
352/* Helpers for creating the argument tuple passed to functions. This has the
353 performance advantage of calling PyTuple_New() only once. */
354
355#define ARG_TUP(self, obj) do { \
356 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
357 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
358 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
359 } \
360 else { \
361 Py_DECREF((obj)); \
362 } \
363 } while (0)
364
365#define FREE_ARG_TUP(self) do { \
366 if ((self)->arg->ob_refcnt > 1) \
367 Py_CLEAR((self)->arg); \
368 } while (0)
369
370/* A temporary cleaner API for fast single argument function call.
371
372 XXX: Does caching the argument tuple provides any real performance benefits?
373
374 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
375 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
376 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
377 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
378 (i.e, call PyTuple_New() and store the returned value in an array), to save
379 one second (wall clock time). Either ways, the loading time a pickle stream
380 large enough to generate this number of calls would be massively
381 overwhelmed by other factors, like I/O throughput, the GC traversal and
382 object allocation overhead. So, I really doubt these functions provide any
383 real benefits.
384
385 On the other hand, oprofile reports that pickle spends a lot of time in
386 these functions. But, that is probably more related to the function call
387 overhead, than the argument tuple allocation.
388
389 XXX: And, what is the reference behavior of these? Steal, borrow? At first
390 glance, it seems to steal the reference of 'arg' and borrow the reference
391 of 'func'.
392 */
393static PyObject *
394pickler_call(PicklerObject *self, PyObject *func, PyObject *arg)
395{
396 PyObject *result = NULL;
397
398 ARG_TUP(self, arg);
399 if (self->arg) {
400 result = PyObject_Call(func, self->arg, NULL);
401 FREE_ARG_TUP(self);
402 }
403 return result;
404}
405
406static PyObject *
407unpickler_call(UnpicklerObject *self, PyObject *func, PyObject *arg)
408{
409 PyObject *result = NULL;
410
411 ARG_TUP(self, arg);
412 if (self->arg) {
413 result = PyObject_Call(func, self->arg, NULL);
414 FREE_ARG_TUP(self);
415 }
416 return result;
417}
418
419static Py_ssize_t
420pickler_write(PicklerObject *self, const char *s, Py_ssize_t n)
421{
422 PyObject *data, *result;
423
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000424 if (self->write_buf == NULL) {
425 PyErr_SetString(PyExc_SystemError, "invalid write buffer");
426 return -1;
427 }
428
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000429 if (s == NULL) {
430 if (!(self->buf_size))
431 return 0;
432 data = PyBytes_FromStringAndSize(self->write_buf, self->buf_size);
433 if (data == NULL)
434 return -1;
435 }
436 else {
437 if (self->buf_size && (n + self->buf_size) > WRITE_BUF_SIZE) {
438 if (pickler_write(self, NULL, 0) < 0)
439 return -1;
440 }
441
442 if (n > WRITE_BUF_SIZE) {
443 if (!(data = PyBytes_FromStringAndSize(s, n)))
444 return -1;
445 }
446 else {
447 memcpy(self->write_buf + self->buf_size, s, n);
448 self->buf_size += n;
449 return n;
450 }
451 }
452
453 /* object with write method */
454 result = pickler_call(self, self->write, data);
455 if (result == NULL)
456 return -1;
457
458 Py_DECREF(result);
459 self->buf_size = 0;
460 return n;
461}
462
463/* XXX: These read/readline functions ought to be optimized. Buffered I/O
464 might help a lot, especially with the new (but much slower) io library.
465 On the other hand, the added complexity might not worth it.
466 */
467
468/* Read at least n characters from the input stream and set s to the current
469 reading position. */
470static Py_ssize_t
471unpickler_read(UnpicklerObject *self, char **s, Py_ssize_t n)
472{
473 PyObject *len;
474 PyObject *data;
475
476 len = PyLong_FromSsize_t(n);
477 if (len == NULL)
478 return -1;
479
480 data = unpickler_call(self, self->read, len);
481 if (data == NULL)
482 return -1;
483
484 /* XXX: Should bytearray be supported too? */
485 if (!PyBytes_Check(data)) {
486 PyErr_SetString(PyExc_ValueError,
487 "read() from the underlying stream did not"
488 "return bytes");
Amaury Forgeot d'Arc5f952572008-11-25 21:11:54 +0000489 Py_DECREF(data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000490 return -1;
491 }
492
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000493 if (PyBytes_GET_SIZE(data) != n) {
494 PyErr_SetNone(PyExc_EOFError);
Amaury Forgeot d'Arc5f952572008-11-25 21:11:54 +0000495 Py_DECREF(data);
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000496 return -1;
497 }
498
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000499 Py_XDECREF(self->last_string);
500 self->last_string = data;
501
502 if (!(*s = PyBytes_AS_STRING(data)))
503 return -1;
504
505 return n;
506}
507
508static Py_ssize_t
509unpickler_readline(UnpicklerObject *self, char **s)
510{
511 PyObject *data;
512
513 data = PyObject_CallObject(self->readline, empty_tuple);
514 if (data == NULL)
515 return -1;
516
517 /* XXX: Should bytearray be supported too? */
518 if (!PyBytes_Check(data)) {
519 PyErr_SetString(PyExc_ValueError,
520 "readline() from the underlying stream did not"
521 "return bytes");
522 return -1;
523 }
524
525 Py_XDECREF(self->last_string);
526 self->last_string = data;
527
528 if (!(*s = PyBytes_AS_STRING(data)))
529 return -1;
530
531 return PyBytes_GET_SIZE(data);
532}
533
534/* Generate a GET opcode for an object stored in the memo. The 'key' argument
535 should be the address of the object as returned by PyLong_FromVoidPtr(). */
536static int
537memo_get(PicklerObject *self, PyObject *key)
538{
539 PyObject *value;
540 PyObject *memo_id;
541 long x;
542 char pdata[30];
543 int len;
544
545 value = PyDict_GetItemWithError(self->memo, key);
546 if (value == NULL) {
547 if (!PyErr_Occurred())
548 PyErr_SetObject(PyExc_KeyError, key);
549 return -1;
550 }
551
552 memo_id = PyTuple_GetItem(value, 0);
553 if (memo_id == NULL)
554 return -1;
555
556 if (!PyLong_Check(memo_id)) {
557 PyErr_SetString(PicklingError, "memo id must be an integer");
558 return -1;
559 }
560 x = PyLong_AsLong(memo_id);
561 if (x == -1 && PyErr_Occurred())
562 return -1;
563
564 if (!self->bin) {
565 pdata[0] = GET;
566 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
567 len = (int)strlen(pdata);
568 }
569 else {
570 if (x < 256) {
571 pdata[0] = BINGET;
572 pdata[1] = (unsigned char)(x & 0xff);
573 len = 2;
574 }
575 else if (x <= 0xffffffffL) {
576 pdata[0] = LONG_BINGET;
577 pdata[1] = (unsigned char)(x & 0xff);
578 pdata[2] = (unsigned char)((x >> 8) & 0xff);
579 pdata[3] = (unsigned char)((x >> 16) & 0xff);
580 pdata[4] = (unsigned char)((x >> 24) & 0xff);
581 len = 5;
582 }
583 else { /* unlikely */
584 PyErr_SetString(PicklingError,
585 "memo id too large for LONG_BINGET");
586 return -1;
587 }
588 }
589
590 if (pickler_write(self, pdata, len) < 0)
591 return -1;
592
593 return 0;
594}
595
596/* Store an object in the memo, assign it a new unique ID based on the number
597 of objects currently stored in the memo and generate a PUT opcode. */
598static int
599memo_put(PicklerObject *self, PyObject *obj)
600{
601 PyObject *key = NULL;
602 PyObject *memo_id = NULL;
603 PyObject *tuple = NULL;
604 long x;
605 char pdata[30];
606 int len;
607 int status = 0;
608
609 if (self->fast)
610 return 0;
611
612 key = PyLong_FromVoidPtr(obj);
613 if (key == NULL)
614 goto error;
615 if ((x = PyDict_Size(self->memo)) < 0)
616 goto error;
617 memo_id = PyLong_FromLong(x);
618 if (memo_id == NULL)
619 goto error;
620 tuple = PyTuple_New(2);
621 if (tuple == NULL)
622 goto error;
623
624 Py_INCREF(memo_id);
625 PyTuple_SET_ITEM(tuple, 0, memo_id);
626 Py_INCREF(obj);
627 PyTuple_SET_ITEM(tuple, 1, obj);
628 if (PyDict_SetItem(self->memo, key, tuple) < 0)
629 goto error;
630
631 if (!self->bin) {
632 pdata[0] = PUT;
633 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
634 len = strlen(pdata);
635 }
636 else {
637 if (x < 256) {
638 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +0000639 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000640 len = 2;
641 }
642 else if (x <= 0xffffffffL) {
643 pdata[0] = LONG_BINPUT;
644 pdata[1] = (unsigned char)(x & 0xff);
645 pdata[2] = (unsigned char)((x >> 8) & 0xff);
646 pdata[3] = (unsigned char)((x >> 16) & 0xff);
647 pdata[4] = (unsigned char)((x >> 24) & 0xff);
648 len = 5;
649 }
650 else { /* unlikely */
651 PyErr_SetString(PicklingError,
652 "memo id too large for LONG_BINPUT");
653 return -1;
654 }
655 }
656
657 if (pickler_write(self, pdata, len) < 0)
658 goto error;
659
660 if (0) {
661 error:
662 status = -1;
663 }
664
665 Py_XDECREF(key);
666 Py_XDECREF(memo_id);
667 Py_XDECREF(tuple);
668
669 return status;
670}
671
672static PyObject *
673whichmodule(PyObject *global, PyObject *global_name)
674{
675 Py_ssize_t i, j;
676 static PyObject *module_str = NULL;
677 static PyObject *main_str = NULL;
678 PyObject *module_name;
679 PyObject *modules_dict;
680 PyObject *module;
681 PyObject *obj;
682
683 if (module_str == NULL) {
684 module_str = PyUnicode_InternFromString("__module__");
685 if (module_str == NULL)
686 return NULL;
687 main_str = PyUnicode_InternFromString("__main__");
688 if (main_str == NULL)
689 return NULL;
690 }
691
692 module_name = PyObject_GetAttr(global, module_str);
693
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +0000694 /* In some rare cases (e.g., bound methods of extension types),
695 __module__ can be None. If it is so, then search sys.modules
696 for the module of global. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000697 if (module_name == Py_None) {
698 Py_DECREF(module_name);
699 goto search;
700 }
701
702 if (module_name) {
703 return module_name;
704 }
705 if (PyErr_ExceptionMatches(PyExc_AttributeError))
706 PyErr_Clear();
707 else
708 return NULL;
709
710 search:
711 modules_dict = PySys_GetObject("modules");
712 if (modules_dict == NULL)
713 return NULL;
714
715 i = 0;
716 module_name = NULL;
717 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Mark Dickinson211c6252009-02-01 10:28:51 +0000718 if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000719 continue;
720
721 obj = PyObject_GetAttr(module, global_name);
722 if (obj == NULL) {
723 if (PyErr_ExceptionMatches(PyExc_AttributeError))
724 PyErr_Clear();
725 else
726 return NULL;
727 continue;
728 }
729
730 if (obj != global) {
731 Py_DECREF(obj);
732 continue;
733 }
734
735 Py_DECREF(obj);
736 break;
737 }
738
739 /* If no module is found, use __main__. */
740 if (!j) {
741 module_name = main_str;
742 }
743
744 Py_INCREF(module_name);
745 return module_name;
746}
747
748/* fast_save_enter() and fast_save_leave() are guards against recursive
749 objects when Pickler is used with the "fast mode" (i.e., with object
750 memoization disabled). If the nesting of a list or dict object exceed
751 FAST_NESTING_LIMIT, these guards will start keeping an internal
752 reference to the seen list or dict objects and check whether these objects
753 are recursive. These are not strictly necessary, since save() has a
754 hard-coded recursion limit, but they give a nicer error message than the
755 typical RuntimeError. */
756static int
757fast_save_enter(PicklerObject *self, PyObject *obj)
758{
759 /* if fast_nesting < 0, we're doing an error exit. */
760 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
761 PyObject *key = NULL;
762 if (self->fast_memo == NULL) {
763 self->fast_memo = PyDict_New();
764 if (self->fast_memo == NULL) {
765 self->fast_nesting = -1;
766 return 0;
767 }
768 }
769 key = PyLong_FromVoidPtr(obj);
770 if (key == NULL)
771 return 0;
772 if (PyDict_GetItem(self->fast_memo, key)) {
773 Py_DECREF(key);
774 PyErr_Format(PyExc_ValueError,
775 "fast mode: can't pickle cyclic objects "
776 "including object type %.200s at %p",
777 obj->ob_type->tp_name, obj);
778 self->fast_nesting = -1;
779 return 0;
780 }
781 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
782 Py_DECREF(key);
783 self->fast_nesting = -1;
784 return 0;
785 }
786 Py_DECREF(key);
787 }
788 return 1;
789}
790
791static int
792fast_save_leave(PicklerObject *self, PyObject *obj)
793{
794 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
795 PyObject *key = PyLong_FromVoidPtr(obj);
796 if (key == NULL)
797 return 0;
798 if (PyDict_DelItem(self->fast_memo, key) < 0) {
799 Py_DECREF(key);
800 return 0;
801 }
802 Py_DECREF(key);
803 }
804 return 1;
805}
806
807static int
808save_none(PicklerObject *self, PyObject *obj)
809{
810 const char none_op = NONE;
811 if (pickler_write(self, &none_op, 1) < 0)
812 return -1;
813
814 return 0;
815}
816
817static int
818save_bool(PicklerObject *self, PyObject *obj)
819{
820 static const char *buf[2] = { FALSE, TRUE };
821 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
822 int p = (obj == Py_True);
823
824 if (self->proto >= 2) {
825 const char bool_op = p ? NEWTRUE : NEWFALSE;
826 if (pickler_write(self, &bool_op, 1) < 0)
827 return -1;
828 }
829 else if (pickler_write(self, buf[p], len[p]) < 0)
830 return -1;
831
832 return 0;
833}
834
835static int
836save_int(PicklerObject *self, long x)
837{
838 char pdata[32];
839 int len = 0;
840
841 if (!self->bin
842#if SIZEOF_LONG > 4
843 || x > 0x7fffffffL || x < -0x80000000L
844#endif
845 ) {
846 /* Text-mode pickle, or long too big to fit in the 4-byte
847 * signed BININT format: store as a string.
848 */
Mark Dickinson8dd05142009-01-20 20:43:58 +0000849 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
850 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000851 if (pickler_write(self, pdata, strlen(pdata)) < 0)
852 return -1;
853 }
854 else {
855 /* Binary pickle and x fits in a signed 4-byte int. */
856 pdata[1] = (unsigned char)(x & 0xff);
857 pdata[2] = (unsigned char)((x >> 8) & 0xff);
858 pdata[3] = (unsigned char)((x >> 16) & 0xff);
859 pdata[4] = (unsigned char)((x >> 24) & 0xff);
860
861 if ((pdata[4] == 0) && (pdata[3] == 0)) {
862 if (pdata[2] == 0) {
863 pdata[0] = BININT1;
864 len = 2;
865 }
866 else {
867 pdata[0] = BININT2;
868 len = 3;
869 }
870 }
871 else {
872 pdata[0] = BININT;
873 len = 5;
874 }
875
876 if (pickler_write(self, pdata, len) < 0)
877 return -1;
878 }
879
880 return 0;
881}
882
883static int
884save_long(PicklerObject *self, PyObject *obj)
885{
886 PyObject *repr = NULL;
887 Py_ssize_t size;
888 long val = PyLong_AsLong(obj);
889 int status = 0;
890
891 const char long_op = LONG;
892
893 if (val == -1 && PyErr_Occurred()) {
894 /* out of range for int pickling */
895 PyErr_Clear();
896 }
897 else
898 return save_int(self, val);
899
900 if (self->proto >= 2) {
901 /* Linear-time pickling. */
902 size_t nbits;
903 size_t nbytes;
904 unsigned char *pdata;
905 char header[5];
906 int i;
907 int sign = _PyLong_Sign(obj);
908
909 if (sign == 0) {
910 header[0] = LONG1;
911 header[1] = 0; /* It's 0 -- an empty bytestring. */
912 if (pickler_write(self, header, 2) < 0)
913 goto error;
914 return 0;
915 }
916 nbits = _PyLong_NumBits(obj);
917 if (nbits == (size_t)-1 && PyErr_Occurred())
918 goto error;
919 /* How many bytes do we need? There are nbits >> 3 full
920 * bytes of data, and nbits & 7 leftover bits. If there
921 * are any leftover bits, then we clearly need another
922 * byte. Wnat's not so obvious is that we *probably*
923 * need another byte even if there aren't any leftovers:
924 * the most-significant bit of the most-significant byte
925 * acts like a sign bit, and it's usually got a sense
926 * opposite of the one we need. The exception is longs
927 * of the form -(2**(8*j-1)) for j > 0. Such a long is
928 * its own 256's-complement, so has the right sign bit
929 * even without the extra byte. That's a pain to check
930 * for in advance, though, so we always grab an extra
931 * byte at the start, and cut it back later if possible.
932 */
933 nbytes = (nbits >> 3) + 1;
934 if (nbytes > INT_MAX) {
935 PyErr_SetString(PyExc_OverflowError,
936 "long too large to pickle");
937 goto error;
938 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000939 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000940 if (repr == NULL)
941 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000942 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000943 i = _PyLong_AsByteArray((PyLongObject *)obj,
944 pdata, nbytes,
945 1 /* little endian */ , 1 /* signed */ );
946 if (i < 0)
947 goto error;
948 /* If the long is negative, this may be a byte more than
949 * needed. This is so iff the MSB is all redundant sign
950 * bits.
951 */
952 if (sign < 0 &&
953 nbytes > 1 &&
954 pdata[nbytes - 1] == 0xff &&
955 (pdata[nbytes - 2] & 0x80) != 0) {
956 nbytes--;
957 }
958
959 if (nbytes < 256) {
960 header[0] = LONG1;
961 header[1] = (unsigned char)nbytes;
962 size = 2;
963 }
964 else {
965 header[0] = LONG4;
966 size = (int)nbytes;
967 for (i = 1; i < 5; i++) {
968 header[i] = (unsigned char)(size & 0xff);
969 size >>= 8;
970 }
971 size = 5;
972 }
973 if (pickler_write(self, header, size) < 0 ||
974 pickler_write(self, (char *)pdata, (int)nbytes) < 0)
975 goto error;
976 }
977 else {
978 char *string;
979
Mark Dickinson8dd05142009-01-20 20:43:58 +0000980 /* proto < 2: write the repr and newline. This is quadratic-time (in
981 the number of digits), in both directions. We add a trailing 'L'
982 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000983
984 repr = PyObject_Repr(obj);
985 if (repr == NULL)
986 goto error;
987
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000988 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000989 if (string == NULL)
990 goto error;
991
992 if (pickler_write(self, &long_op, 1) < 0 ||
993 pickler_write(self, string, size) < 0 ||
Mark Dickinson8dd05142009-01-20 20:43:58 +0000994 pickler_write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000995 goto error;
996 }
997
998 if (0) {
999 error:
1000 status = -1;
1001 }
1002 Py_XDECREF(repr);
1003
1004 return status;
1005}
1006
1007static int
1008save_float(PicklerObject *self, PyObject *obj)
1009{
1010 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1011
1012 if (self->bin) {
1013 char pdata[9];
1014 pdata[0] = BINFLOAT;
1015 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1016 return -1;
1017 if (pickler_write(self, pdata, 9) < 0)
1018 return -1;
Eric Smith0923d1d2009-04-16 20:16:10 +00001019 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001020 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001021 int result = -1;
1022 char *buf = NULL;
1023 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001024
Eric Smith0923d1d2009-04-16 20:16:10 +00001025 if (pickler_write(self, &op, 1) < 0)
1026 goto done;
1027
Mark Dickinson3e09f432009-04-17 08:41:23 +00001028 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001029 if (!buf) {
1030 PyErr_NoMemory();
1031 goto done;
1032 }
1033
1034 if (pickler_write(self, buf, strlen(buf)) < 0)
1035 goto done;
1036
1037 if (pickler_write(self, "\n", 1) < 0)
1038 goto done;
1039
1040 result = 0;
1041done:
1042 PyMem_Free(buf);
1043 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001044 }
1045
1046 return 0;
1047}
1048
1049static int
1050save_bytes(PicklerObject *self, PyObject *obj)
1051{
1052 if (self->proto < 3) {
1053 /* Older pickle protocols do not have an opcode for pickling bytes
1054 objects. Therefore, we need to fake the copy protocol (i.e.,
1055 the __reduce__ method) to permit bytes object unpickling. */
1056 PyObject *reduce_value = NULL;
1057 PyObject *bytelist = NULL;
1058 int status;
1059
1060 bytelist = PySequence_List(obj);
1061 if (bytelist == NULL)
1062 return -1;
1063
1064 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1065 bytelist);
1066 if (reduce_value == NULL) {
1067 Py_DECREF(bytelist);
1068 return -1;
1069 }
1070
1071 /* save_reduce() will memoize the object automatically. */
1072 status = save_reduce(self, reduce_value, obj);
1073 Py_DECREF(reduce_value);
1074 Py_DECREF(bytelist);
1075 return status;
1076 }
1077 else {
1078 Py_ssize_t size;
1079 char header[5];
1080 int len;
1081
1082 size = PyBytes_Size(obj);
1083 if (size < 0)
1084 return -1;
1085
1086 if (size < 256) {
1087 header[0] = SHORT_BINBYTES;
1088 header[1] = (unsigned char)size;
1089 len = 2;
1090 }
1091 else if (size <= 0xffffffffL) {
1092 header[0] = BINBYTES;
1093 header[1] = (unsigned char)(size & 0xff);
1094 header[2] = (unsigned char)((size >> 8) & 0xff);
1095 header[3] = (unsigned char)((size >> 16) & 0xff);
1096 header[4] = (unsigned char)((size >> 24) & 0xff);
1097 len = 5;
1098 }
1099 else {
1100 return -1; /* string too large */
1101 }
1102
1103 if (pickler_write(self, header, len) < 0)
1104 return -1;
1105
1106 if (pickler_write(self, PyBytes_AS_STRING(obj), size) < 0)
1107 return -1;
1108
1109 if (memo_put(self, obj) < 0)
1110 return -1;
1111
1112 return 0;
1113 }
1114}
1115
1116/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1117 backslash and newline characters to \uXXXX escapes. */
1118static PyObject *
1119raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1120{
1121 PyObject *repr, *result;
1122 char *p;
1123 char *q;
1124
1125 static const char *hexdigits = "0123456789abcdef";
1126
1127#ifdef Py_UNICODE_WIDE
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001128 const Py_ssize_t expandsize = 10;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001129#else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001130 const Py_ssize_t expandsize = 6;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001131#endif
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001132
1133 if (size > PY_SSIZE_T_MAX / expandsize)
1134 return PyErr_NoMemory();
1135
1136 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001137 if (repr == NULL)
1138 return NULL;
1139 if (size == 0)
1140 goto done;
1141
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001142 p = q = PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001143 while (size-- > 0) {
1144 Py_UNICODE ch = *s++;
1145#ifdef Py_UNICODE_WIDE
1146 /* Map 32-bit characters to '\Uxxxxxxxx' */
1147 if (ch >= 0x10000) {
1148 *p++ = '\\';
1149 *p++ = 'U';
1150 *p++ = hexdigits[(ch >> 28) & 0xf];
1151 *p++ = hexdigits[(ch >> 24) & 0xf];
1152 *p++ = hexdigits[(ch >> 20) & 0xf];
1153 *p++ = hexdigits[(ch >> 16) & 0xf];
1154 *p++ = hexdigits[(ch >> 12) & 0xf];
1155 *p++ = hexdigits[(ch >> 8) & 0xf];
1156 *p++ = hexdigits[(ch >> 4) & 0xf];
1157 *p++ = hexdigits[ch & 15];
1158 }
1159 else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001160#else
1161 /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
1162 if (ch >= 0xD800 && ch < 0xDC00) {
1163 Py_UNICODE ch2;
1164 Py_UCS4 ucs;
1165
1166 ch2 = *s++;
1167 size--;
1168 if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
1169 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
1170 *p++ = '\\';
1171 *p++ = 'U';
1172 *p++ = hexdigits[(ucs >> 28) & 0xf];
1173 *p++ = hexdigits[(ucs >> 24) & 0xf];
1174 *p++ = hexdigits[(ucs >> 20) & 0xf];
1175 *p++ = hexdigits[(ucs >> 16) & 0xf];
1176 *p++ = hexdigits[(ucs >> 12) & 0xf];
1177 *p++ = hexdigits[(ucs >> 8) & 0xf];
1178 *p++ = hexdigits[(ucs >> 4) & 0xf];
1179 *p++ = hexdigits[ucs & 0xf];
1180 continue;
1181 }
1182 /* Fall through: isolated surrogates are copied as-is */
1183 s--;
1184 size++;
1185 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001186#endif
1187 /* Map 16-bit characters to '\uxxxx' */
1188 if (ch >= 256 || ch == '\\' || ch == '\n') {
1189 *p++ = '\\';
1190 *p++ = 'u';
1191 *p++ = hexdigits[(ch >> 12) & 0xf];
1192 *p++ = hexdigits[(ch >> 8) & 0xf];
1193 *p++ = hexdigits[(ch >> 4) & 0xf];
1194 *p++ = hexdigits[ch & 15];
1195 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001196 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001197 else
1198 *p++ = (char) ch;
1199 }
1200 size = p - q;
1201
1202 done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001203 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001204 Py_DECREF(repr);
1205 return result;
1206}
1207
1208static int
1209save_unicode(PicklerObject *self, PyObject *obj)
1210{
1211 Py_ssize_t size;
1212 PyObject *encoded = NULL;
1213
1214 if (self->bin) {
1215 char pdata[5];
1216
1217 encoded = PyUnicode_AsUTF8String(obj);
1218 if (encoded == NULL)
1219 goto error;
1220
1221 size = PyBytes_GET_SIZE(encoded);
1222 if (size < 0 || size > 0xffffffffL)
1223 goto error; /* string too large */
1224
1225 pdata[0] = BINUNICODE;
1226 pdata[1] = (unsigned char)(size & 0xff);
1227 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1228 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1229 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1230
1231 if (pickler_write(self, pdata, 5) < 0)
1232 goto error;
1233
1234 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1235 goto error;
1236 }
1237 else {
1238 const char unicode_op = UNICODE;
1239
1240 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1241 PyUnicode_GET_SIZE(obj));
1242 if (encoded == NULL)
1243 goto error;
1244
1245 if (pickler_write(self, &unicode_op, 1) < 0)
1246 goto error;
1247
1248 size = PyBytes_GET_SIZE(encoded);
1249 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1250 goto error;
1251
1252 if (pickler_write(self, "\n", 1) < 0)
1253 goto error;
1254 }
1255 if (memo_put(self, obj) < 0)
1256 goto error;
1257
1258 Py_DECREF(encoded);
1259 return 0;
1260
1261 error:
1262 Py_XDECREF(encoded);
1263 return -1;
1264}
1265
1266/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1267static int
1268store_tuple_elements(PicklerObject *self, PyObject *t, int len)
1269{
1270 int i;
1271
1272 assert(PyTuple_Size(t) == len);
1273
1274 for (i = 0; i < len; i++) {
1275 PyObject *element = PyTuple_GET_ITEM(t, i);
1276
1277 if (element == NULL)
1278 return -1;
1279 if (save(self, element, 0) < 0)
1280 return -1;
1281 }
1282
1283 return 0;
1284}
1285
1286/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1287 * used across protocols to minimize the space needed to pickle them.
1288 * Tuples are also the only builtin immutable type that can be recursive
1289 * (a tuple can be reached from itself), and that requires some subtle
1290 * magic so that it works in all cases. IOW, this is a long routine.
1291 */
1292static int
1293save_tuple(PicklerObject *self, PyObject *obj)
1294{
1295 PyObject *memo_key = NULL;
1296 int len, i;
1297 int status = 0;
1298
1299 const char mark_op = MARK;
1300 const char tuple_op = TUPLE;
1301 const char pop_op = POP;
1302 const char pop_mark_op = POP_MARK;
1303 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1304
1305 if ((len = PyTuple_Size(obj)) < 0)
1306 return -1;
1307
1308 if (len == 0) {
1309 char pdata[2];
1310
1311 if (self->proto) {
1312 pdata[0] = EMPTY_TUPLE;
1313 len = 1;
1314 }
1315 else {
1316 pdata[0] = MARK;
1317 pdata[1] = TUPLE;
1318 len = 2;
1319 }
1320 if (pickler_write(self, pdata, len) < 0)
1321 return -1;
1322 return 0;
1323 }
1324
1325 /* id(tuple) isn't in the memo now. If it shows up there after
1326 * saving the tuple elements, the tuple must be recursive, in
1327 * which case we'll pop everything we put on the stack, and fetch
1328 * its value from the memo.
1329 */
1330 memo_key = PyLong_FromVoidPtr(obj);
1331 if (memo_key == NULL)
1332 return -1;
1333
1334 if (len <= 3 && self->proto >= 2) {
1335 /* Use TUPLE{1,2,3} opcodes. */
1336 if (store_tuple_elements(self, obj, len) < 0)
1337 goto error;
1338
1339 if (PyDict_GetItem(self->memo, memo_key)) {
1340 /* pop the len elements */
1341 for (i = 0; i < len; i++)
1342 if (pickler_write(self, &pop_op, 1) < 0)
1343 goto error;
1344 /* fetch from memo */
1345 if (memo_get(self, memo_key) < 0)
1346 goto error;
1347
1348 Py_DECREF(memo_key);
1349 return 0;
1350 }
1351 else { /* Not recursive. */
1352 if (pickler_write(self, len2opcode + len, 1) < 0)
1353 goto error;
1354 }
1355 goto memoize;
1356 }
1357
1358 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
1359 * Generate MARK e1 e2 ... TUPLE
1360 */
1361 if (pickler_write(self, &mark_op, 1) < 0)
1362 goto error;
1363
1364 if (store_tuple_elements(self, obj, len) < 0)
1365 goto error;
1366
1367 if (PyDict_GetItem(self->memo, memo_key)) {
1368 /* pop the stack stuff we pushed */
1369 if (self->bin) {
1370 if (pickler_write(self, &pop_mark_op, 1) < 0)
1371 goto error;
1372 }
1373 else {
1374 /* Note that we pop one more than len, to remove
1375 * the MARK too.
1376 */
1377 for (i = 0; i <= len; i++)
1378 if (pickler_write(self, &pop_op, 1) < 0)
1379 goto error;
1380 }
1381 /* fetch from memo */
1382 if (memo_get(self, memo_key) < 0)
1383 goto error;
1384
1385 Py_DECREF(memo_key);
1386 return 0;
1387 }
1388 else { /* Not recursive. */
1389 if (pickler_write(self, &tuple_op, 1) < 0)
1390 goto error;
1391 }
1392
1393 memoize:
1394 if (memo_put(self, obj) < 0)
1395 goto error;
1396
1397 if (0) {
1398 error:
1399 status = -1;
1400 }
1401
1402 Py_DECREF(memo_key);
1403 return status;
1404}
1405
1406/* iter is an iterator giving items, and we batch up chunks of
1407 * MARK item item ... item APPENDS
1408 * opcode sequences. Calling code should have arranged to first create an
1409 * empty list, or list-like object, for the APPENDS to operate on.
1410 * Returns 0 on success, <0 on error.
1411 */
1412static int
1413batch_list(PicklerObject *self, PyObject *iter)
1414{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001415 PyObject *obj = NULL;
1416 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001417 int i, n;
1418
1419 const char mark_op = MARK;
1420 const char append_op = APPEND;
1421 const char appends_op = APPENDS;
1422
1423 assert(iter != NULL);
1424
1425 /* XXX: I think this function could be made faster by avoiding the
1426 iterator interface and fetching objects directly from list using
1427 PyList_GET_ITEM.
1428 */
1429
1430 if (self->proto == 0) {
1431 /* APPENDS isn't available; do one at a time. */
1432 for (;;) {
1433 obj = PyIter_Next(iter);
1434 if (obj == NULL) {
1435 if (PyErr_Occurred())
1436 return -1;
1437 break;
1438 }
1439 i = save(self, obj, 0);
1440 Py_DECREF(obj);
1441 if (i < 0)
1442 return -1;
1443 if (pickler_write(self, &append_op, 1) < 0)
1444 return -1;
1445 }
1446 return 0;
1447 }
1448
1449 /* proto > 0: write in batches of BATCHSIZE. */
1450 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001451 /* Get first item */
1452 firstitem = PyIter_Next(iter);
1453 if (firstitem == NULL) {
1454 if (PyErr_Occurred())
1455 goto error;
1456
1457 /* nothing more to add */
1458 break;
1459 }
1460
1461 /* Try to get a second item */
1462 obj = PyIter_Next(iter);
1463 if (obj == NULL) {
1464 if (PyErr_Occurred())
1465 goto error;
1466
1467 /* Only one item to write */
1468 if (save(self, firstitem, 0) < 0)
1469 goto error;
1470 if (pickler_write(self, &append_op, 1) < 0)
1471 goto error;
1472 Py_CLEAR(firstitem);
1473 break;
1474 }
1475
1476 /* More than one item to write */
1477
1478 /* Pump out MARK, items, APPENDS. */
1479 if (pickler_write(self, &mark_op, 1) < 0)
1480 goto error;
1481
1482 if (save(self, firstitem, 0) < 0)
1483 goto error;
1484 Py_CLEAR(firstitem);
1485 n = 1;
1486
1487 /* Fetch and save up to BATCHSIZE items */
1488 while (obj) {
1489 if (save(self, obj, 0) < 0)
1490 goto error;
1491 Py_CLEAR(obj);
1492 n += 1;
1493
1494 if (n == BATCHSIZE)
1495 break;
1496
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001497 obj = PyIter_Next(iter);
1498 if (obj == NULL) {
1499 if (PyErr_Occurred())
1500 goto error;
1501 break;
1502 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001503 }
1504
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001505 if (pickler_write(self, &appends_op, 1) < 0)
1506 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001507
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001508 } while (n == BATCHSIZE);
1509 return 0;
1510
1511 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001512 Py_XDECREF(firstitem);
1513 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001514 return -1;
1515}
1516
1517static int
1518save_list(PicklerObject *self, PyObject *obj)
1519{
1520 PyObject *iter;
1521 char header[3];
1522 int len;
1523 int status = 0;
1524
1525 if (self->fast && !fast_save_enter(self, obj))
1526 goto error;
1527
1528 /* Create an empty list. */
1529 if (self->bin) {
1530 header[0] = EMPTY_LIST;
1531 len = 1;
1532 }
1533 else {
1534 header[0] = MARK;
1535 header[1] = LIST;
1536 len = 2;
1537 }
1538
1539 if (pickler_write(self, header, len) < 0)
1540 goto error;
1541
1542 /* Get list length, and bow out early if empty. */
1543 if ((len = PyList_Size(obj)) < 0)
1544 goto error;
1545
1546 if (memo_put(self, obj) < 0)
1547 goto error;
1548
1549 if (len != 0) {
1550 /* Save the list elements. */
1551 iter = PyObject_GetIter(obj);
1552 if (iter == NULL)
1553 goto error;
1554 status = batch_list(self, iter);
1555 Py_DECREF(iter);
1556 }
1557
1558 if (0) {
1559 error:
1560 status = -1;
1561 }
1562
1563 if (self->fast && !fast_save_leave(self, obj))
1564 status = -1;
1565
1566 return status;
1567}
1568
1569/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
1570 * MARK key value ... key value SETITEMS
1571 * opcode sequences. Calling code should have arranged to first create an
1572 * empty dict, or dict-like object, for the SETITEMS to operate on.
1573 * Returns 0 on success, <0 on error.
1574 *
1575 * This is very much like batch_list(). The difference between saving
1576 * elements directly, and picking apart two-tuples, is so long-winded at
1577 * the C level, though, that attempts to combine these routines were too
1578 * ugly to bear.
1579 */
1580static int
1581batch_dict(PicklerObject *self, PyObject *iter)
1582{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001583 PyObject *obj = NULL;
1584 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001585 int i, n;
1586
1587 const char mark_op = MARK;
1588 const char setitem_op = SETITEM;
1589 const char setitems_op = SETITEMS;
1590
1591 assert(iter != NULL);
1592
1593 if (self->proto == 0) {
1594 /* SETITEMS isn't available; do one at a time. */
1595 for (;;) {
1596 obj = PyIter_Next(iter);
1597 if (obj == NULL) {
1598 if (PyErr_Occurred())
1599 return -1;
1600 break;
1601 }
1602 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1603 PyErr_SetString(PyExc_TypeError, "dict items "
1604 "iterator must return 2-tuples");
1605 return -1;
1606 }
1607 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
1608 if (i >= 0)
1609 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
1610 Py_DECREF(obj);
1611 if (i < 0)
1612 return -1;
1613 if (pickler_write(self, &setitem_op, 1) < 0)
1614 return -1;
1615 }
1616 return 0;
1617 }
1618
1619 /* proto > 0: write in batches of BATCHSIZE. */
1620 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001621 /* Get first item */
1622 firstitem = PyIter_Next(iter);
1623 if (firstitem == NULL) {
1624 if (PyErr_Occurred())
1625 goto error;
1626
1627 /* nothing more to add */
1628 break;
1629 }
1630 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
1631 PyErr_SetString(PyExc_TypeError, "dict items "
1632 "iterator must return 2-tuples");
1633 goto error;
1634 }
1635
1636 /* Try to get a second item */
1637 obj = PyIter_Next(iter);
1638 if (obj == NULL) {
1639 if (PyErr_Occurred())
1640 goto error;
1641
1642 /* Only one item to write */
1643 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1644 goto error;
1645 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1646 goto error;
1647 if (pickler_write(self, &setitem_op, 1) < 0)
1648 goto error;
1649 Py_CLEAR(firstitem);
1650 break;
1651 }
1652
1653 /* More than one item to write */
1654
1655 /* Pump out MARK, items, SETITEMS. */
1656 if (pickler_write(self, &mark_op, 1) < 0)
1657 goto error;
1658
1659 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1660 goto error;
1661 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1662 goto error;
1663 Py_CLEAR(firstitem);
1664 n = 1;
1665
1666 /* Fetch and save up to BATCHSIZE items */
1667 while (obj) {
1668 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1669 PyErr_SetString(PyExc_TypeError, "dict items "
1670 "iterator must return 2-tuples");
1671 goto error;
1672 }
1673 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
1674 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
1675 goto error;
1676 Py_CLEAR(obj);
1677 n += 1;
1678
1679 if (n == BATCHSIZE)
1680 break;
1681
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001682 obj = PyIter_Next(iter);
1683 if (obj == NULL) {
1684 if (PyErr_Occurred())
1685 goto error;
1686 break;
1687 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001688 }
1689
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001690 if (pickler_write(self, &setitems_op, 1) < 0)
1691 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001692
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001693 } while (n == BATCHSIZE);
1694 return 0;
1695
1696 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001697 Py_XDECREF(firstitem);
1698 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001699 return -1;
1700}
1701
1702static int
1703save_dict(PicklerObject *self, PyObject *obj)
1704{
1705 PyObject *items, *iter;
1706 char header[3];
1707 int len;
1708 int status = 0;
1709
1710 if (self->fast && !fast_save_enter(self, obj))
1711 goto error;
1712
1713 /* Create an empty dict. */
1714 if (self->bin) {
1715 header[0] = EMPTY_DICT;
1716 len = 1;
1717 }
1718 else {
1719 header[0] = MARK;
1720 header[1] = DICT;
1721 len = 2;
1722 }
1723
1724 if (pickler_write(self, header, len) < 0)
1725 goto error;
1726
1727 /* Get dict size, and bow out early if empty. */
1728 if ((len = PyDict_Size(obj)) < 0)
1729 goto error;
1730
1731 if (memo_put(self, obj) < 0)
1732 goto error;
1733
1734 if (len != 0) {
1735 /* Save the dict items. */
1736 items = PyObject_CallMethod(obj, "items", "()");
1737 if (items == NULL)
1738 goto error;
1739 iter = PyObject_GetIter(items);
1740 Py_DECREF(items);
1741 if (iter == NULL)
1742 goto error;
1743 status = batch_dict(self, iter);
1744 Py_DECREF(iter);
1745 }
1746
1747 if (0) {
1748 error:
1749 status = -1;
1750 }
1751
1752 if (self->fast && !fast_save_leave(self, obj))
1753 status = -1;
1754
1755 return status;
1756}
1757
1758static int
1759save_global(PicklerObject *self, PyObject *obj, PyObject *name)
1760{
1761 static PyObject *name_str = NULL;
1762 PyObject *global_name = NULL;
1763 PyObject *module_name = NULL;
1764 PyObject *module = NULL;
1765 PyObject *cls;
1766 int status = 0;
1767
1768 const char global_op = GLOBAL;
1769
1770 if (name_str == NULL) {
1771 name_str = PyUnicode_InternFromString("__name__");
1772 if (name_str == NULL)
1773 goto error;
1774 }
1775
1776 if (name) {
1777 global_name = name;
1778 Py_INCREF(global_name);
1779 }
1780 else {
1781 global_name = PyObject_GetAttr(obj, name_str);
1782 if (global_name == NULL)
1783 goto error;
1784 }
1785
1786 module_name = whichmodule(obj, global_name);
1787 if (module_name == NULL)
1788 goto error;
1789
1790 /* XXX: Change to use the import C API directly with level=0 to disallow
1791 relative imports.
1792
1793 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
1794 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
1795 custom import functions (IMHO, this would be a nice security
1796 feature). The import C API would need to be extended to support the
1797 extra parameters of __import__ to fix that. */
1798 module = PyImport_Import(module_name);
1799 if (module == NULL) {
1800 PyErr_Format(PicklingError,
1801 "Can't pickle %R: import of module %R failed",
1802 obj, module_name);
1803 goto error;
1804 }
1805 cls = PyObject_GetAttr(module, global_name);
1806 if (cls == NULL) {
1807 PyErr_Format(PicklingError,
1808 "Can't pickle %R: attribute lookup %S.%S failed",
1809 obj, module_name, global_name);
1810 goto error;
1811 }
1812 if (cls != obj) {
1813 Py_DECREF(cls);
1814 PyErr_Format(PicklingError,
1815 "Can't pickle %R: it's not the same object as %S.%S",
1816 obj, module_name, global_name);
1817 goto error;
1818 }
1819 Py_DECREF(cls);
1820
1821 if (self->proto >= 2) {
1822 /* See whether this is in the extension registry, and if
1823 * so generate an EXT opcode.
1824 */
1825 PyObject *code_obj; /* extension code as Python object */
1826 long code; /* extension code as C value */
1827 char pdata[5];
1828 int n;
1829
1830 PyTuple_SET_ITEM(two_tuple, 0, module_name);
1831 PyTuple_SET_ITEM(two_tuple, 1, global_name);
1832 code_obj = PyDict_GetItem(extension_registry, two_tuple);
1833 /* The object is not registered in the extension registry.
1834 This is the most likely code path. */
1835 if (code_obj == NULL)
1836 goto gen_global;
1837
1838 /* XXX: pickle.py doesn't check neither the type, nor the range
1839 of the value returned by the extension_registry. It should for
1840 consistency. */
1841
1842 /* Verify code_obj has the right type and value. */
1843 if (!PyLong_Check(code_obj)) {
1844 PyErr_Format(PicklingError,
1845 "Can't pickle %R: extension code %R isn't an integer",
1846 obj, code_obj);
1847 goto error;
1848 }
1849 code = PyLong_AS_LONG(code_obj);
1850 if (code <= 0 || code > 0x7fffffffL) {
1851 PyErr_Format(PicklingError,
1852 "Can't pickle %R: extension code %ld is out of range",
1853 obj, code);
1854 goto error;
1855 }
1856
1857 /* Generate an EXT opcode. */
1858 if (code <= 0xff) {
1859 pdata[0] = EXT1;
1860 pdata[1] = (unsigned char)code;
1861 n = 2;
1862 }
1863 else if (code <= 0xffff) {
1864 pdata[0] = EXT2;
1865 pdata[1] = (unsigned char)(code & 0xff);
1866 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1867 n = 3;
1868 }
1869 else {
1870 pdata[0] = EXT4;
1871 pdata[1] = (unsigned char)(code & 0xff);
1872 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1873 pdata[3] = (unsigned char)((code >> 16) & 0xff);
1874 pdata[4] = (unsigned char)((code >> 24) & 0xff);
1875 n = 5;
1876 }
1877
1878 if (pickler_write(self, pdata, n) < 0)
1879 goto error;
1880 }
1881 else {
1882 /* Generate a normal global opcode if we are using a pickle
1883 protocol <= 2, or if the object is not registered in the
1884 extension registry. */
1885 PyObject *encoded;
1886 PyObject *(*unicode_encoder)(PyObject *);
1887
1888 gen_global:
1889 if (pickler_write(self, &global_op, 1) < 0)
1890 goto error;
1891
1892 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
1893 the module name and the global name using UTF-8. We do so only when
1894 we are using the pickle protocol newer than version 3. This is to
1895 ensure compatibility with older Unpickler running on Python 2.x. */
1896 if (self->proto >= 3) {
1897 unicode_encoder = PyUnicode_AsUTF8String;
1898 }
1899 else {
1900 unicode_encoder = PyUnicode_AsASCIIString;
1901 }
1902
1903 /* Save the name of the module. */
1904 encoded = unicode_encoder(module_name);
1905 if (encoded == NULL) {
1906 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1907 PyErr_Format(PicklingError,
1908 "can't pickle module identifier '%S' using "
1909 "pickle protocol %i", module_name, self->proto);
1910 goto error;
1911 }
1912 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1913 PyBytes_GET_SIZE(encoded)) < 0) {
1914 Py_DECREF(encoded);
1915 goto error;
1916 }
1917 Py_DECREF(encoded);
1918 if(pickler_write(self, "\n", 1) < 0)
1919 goto error;
1920
1921 /* Save the name of the module. */
1922 encoded = unicode_encoder(global_name);
1923 if (encoded == NULL) {
1924 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1925 PyErr_Format(PicklingError,
1926 "can't pickle global identifier '%S' using "
1927 "pickle protocol %i", global_name, self->proto);
1928 goto error;
1929 }
1930 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1931 PyBytes_GET_SIZE(encoded)) < 0) {
1932 Py_DECREF(encoded);
1933 goto error;
1934 }
1935 Py_DECREF(encoded);
1936 if(pickler_write(self, "\n", 1) < 0)
1937 goto error;
1938
1939 /* Memoize the object. */
1940 if (memo_put(self, obj) < 0)
1941 goto error;
1942 }
1943
1944 if (0) {
1945 error:
1946 status = -1;
1947 }
1948 Py_XDECREF(module_name);
1949 Py_XDECREF(global_name);
1950 Py_XDECREF(module);
1951
1952 return status;
1953}
1954
1955static int
1956save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
1957{
1958 PyObject *pid = NULL;
1959 int status = 0;
1960
1961 const char persid_op = PERSID;
1962 const char binpersid_op = BINPERSID;
1963
1964 Py_INCREF(obj);
1965 pid = pickler_call(self, func, obj);
1966 if (pid == NULL)
1967 return -1;
1968
1969 if (pid != Py_None) {
1970 if (self->bin) {
1971 if (save(self, pid, 1) < 0 ||
1972 pickler_write(self, &binpersid_op, 1) < 0)
1973 goto error;
1974 }
1975 else {
1976 PyObject *pid_str = NULL;
1977 char *pid_ascii_bytes;
1978 Py_ssize_t size;
1979
1980 pid_str = PyObject_Str(pid);
1981 if (pid_str == NULL)
1982 goto error;
1983
1984 /* XXX: Should it check whether the persistent id only contains
1985 ASCII characters? And what if the pid contains embedded
1986 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001987 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001988 Py_DECREF(pid_str);
1989 if (pid_ascii_bytes == NULL)
1990 goto error;
1991
1992 if (pickler_write(self, &persid_op, 1) < 0 ||
1993 pickler_write(self, pid_ascii_bytes, size) < 0 ||
1994 pickler_write(self, "\n", 1) < 0)
1995 goto error;
1996 }
1997 status = 1;
1998 }
1999
2000 if (0) {
2001 error:
2002 status = -1;
2003 }
2004 Py_XDECREF(pid);
2005
2006 return status;
2007}
2008
2009/* We're saving obj, and args is the 2-thru-5 tuple returned by the
2010 * appropriate __reduce__ method for obj.
2011 */
2012static int
2013save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
2014{
2015 PyObject *callable;
2016 PyObject *argtup;
2017 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002018 PyObject *listitems = Py_None;
2019 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002020 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002021
2022 int use_newobj = self->proto >= 2;
2023
2024 const char reduce_op = REDUCE;
2025 const char build_op = BUILD;
2026 const char newobj_op = NEWOBJ;
2027
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002028 size = PyTuple_Size(args);
2029 if (size < 2 || size > 5) {
2030 PyErr_SetString(PicklingError, "tuple returned by "
2031 "__reduce__ must contain 2 through 5 elements");
2032 return -1;
2033 }
2034
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002035 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2036 &callable, &argtup, &state, &listitems, &dictitems))
2037 return -1;
2038
2039 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002040 PyErr_SetString(PicklingError, "first item of the tuple "
2041 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002042 return -1;
2043 }
2044 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002045 PyErr_SetString(PicklingError, "second item of the tuple "
2046 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002047 return -1;
2048 }
2049
2050 if (state == Py_None)
2051 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002052
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002053 if (listitems == Py_None)
2054 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002055 else if (!PyIter_Check(listitems)) {
2056 PyErr_Format(PicklingError, "Fourth element of tuple"
2057 "returned by __reduce__ must be an iterator, not %s",
2058 Py_TYPE(listitems)->tp_name);
2059 return -1;
2060 }
2061
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002062 if (dictitems == Py_None)
2063 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002064 else if (!PyIter_Check(dictitems)) {
2065 PyErr_Format(PicklingError, "Fifth element of tuple"
2066 "returned by __reduce__ must be an iterator, not %s",
2067 Py_TYPE(dictitems)->tp_name);
2068 return -1;
2069 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002070
2071 /* Protocol 2 special case: if callable's name is __newobj__, use
2072 NEWOBJ. */
2073 if (use_newobj) {
2074 static PyObject *newobj_str = NULL;
2075 PyObject *name_str;
2076
2077 if (newobj_str == NULL) {
2078 newobj_str = PyUnicode_InternFromString("__newobj__");
2079 }
2080
2081 name_str = PyObject_GetAttrString(callable, "__name__");
2082 if (name_str == NULL) {
2083 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2084 PyErr_Clear();
2085 else
2086 return -1;
2087 use_newobj = 0;
2088 }
2089 else {
2090 use_newobj = PyUnicode_Check(name_str) &&
2091 PyUnicode_Compare(name_str, newobj_str) == 0;
2092 Py_DECREF(name_str);
2093 }
2094 }
2095 if (use_newobj) {
2096 PyObject *cls;
2097 PyObject *newargtup;
2098 PyObject *obj_class;
2099 int p;
2100
2101 /* Sanity checks. */
2102 if (Py_SIZE(argtup) < 1) {
2103 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2104 return -1;
2105 }
2106
2107 cls = PyTuple_GET_ITEM(argtup, 0);
2108 if (!PyObject_HasAttrString(cls, "__new__")) {
2109 PyErr_SetString(PicklingError, "args[0] from "
2110 "__newobj__ args has no __new__");
2111 return -1;
2112 }
2113
2114 if (obj != NULL) {
2115 obj_class = PyObject_GetAttrString(obj, "__class__");
2116 if (obj_class == NULL) {
2117 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2118 PyErr_Clear();
2119 else
2120 return -1;
2121 }
2122 p = obj_class != cls; /* true iff a problem */
2123 Py_DECREF(obj_class);
2124 if (p) {
2125 PyErr_SetString(PicklingError, "args[0] from "
2126 "__newobj__ args has the wrong class");
2127 return -1;
2128 }
2129 }
2130 /* XXX: These calls save() are prone to infinite recursion. Imagine
2131 what happen if the value returned by the __reduce__() method of
2132 some extension type contains another object of the same type. Ouch!
2133
2134 Here is a quick example, that I ran into, to illustrate what I
2135 mean:
2136
2137 >>> import pickle, copyreg
2138 >>> copyreg.dispatch_table.pop(complex)
2139 >>> pickle.dumps(1+2j)
2140 Traceback (most recent call last):
2141 ...
2142 RuntimeError: maximum recursion depth exceeded
2143
2144 Removing the complex class from copyreg.dispatch_table made the
2145 __reduce_ex__() method emit another complex object:
2146
2147 >>> (1+1j).__reduce_ex__(2)
2148 (<function __newobj__ at 0xb7b71c3c>,
2149 (<class 'complex'>, (1+1j)), None, None, None)
2150
2151 Thus when save() was called on newargstup (the 2nd item) recursion
2152 ensued. Of course, the bug was in the complex class which had a
2153 broken __getnewargs__() that emitted another complex object. But,
2154 the point, here, is it is quite easy to end up with a broken reduce
2155 function. */
2156
2157 /* Save the class and its __new__ arguments. */
2158 if (save(self, cls, 0) < 0)
2159 return -1;
2160
2161 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
2162 if (newargtup == NULL)
2163 return -1;
2164
2165 p = save(self, newargtup, 0);
2166 Py_DECREF(newargtup);
2167 if (p < 0)
2168 return -1;
2169
2170 /* Add NEWOBJ opcode. */
2171 if (pickler_write(self, &newobj_op, 1) < 0)
2172 return -1;
2173 }
2174 else { /* Not using NEWOBJ. */
2175 if (save(self, callable, 0) < 0 ||
2176 save(self, argtup, 0) < 0 ||
2177 pickler_write(self, &reduce_op, 1) < 0)
2178 return -1;
2179 }
2180
2181 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
2182 the caller do not want to memoize the object. Not particularly useful,
2183 but that is to mimic the behavior save_reduce() in pickle.py when
2184 obj is None. */
2185 if (obj && memo_put(self, obj) < 0)
2186 return -1;
2187
2188 if (listitems && batch_list(self, listitems) < 0)
2189 return -1;
2190
2191 if (dictitems && batch_dict(self, dictitems) < 0)
2192 return -1;
2193
2194 if (state) {
2195 if (save(self, state, 0) < 0 ||
2196 pickler_write(self, &build_op, 1) < 0)
2197 return -1;
2198 }
2199
2200 return 0;
2201}
2202
2203static int
2204save(PicklerObject *self, PyObject *obj, int pers_save)
2205{
2206 PyTypeObject *type;
2207 PyObject *reduce_func = NULL;
2208 PyObject *reduce_value = NULL;
2209 PyObject *memo_key = NULL;
2210 int status = 0;
2211
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002212 if (Py_EnterRecursiveCall(" while pickling an object") < 0)
2213 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002214
2215 /* The extra pers_save argument is necessary to avoid calling save_pers()
2216 on its returned object. */
2217 if (!pers_save && self->pers_func) {
2218 /* save_pers() returns:
2219 -1 to signal an error;
2220 0 if it did nothing successfully;
2221 1 if a persistent id was saved.
2222 */
2223 if ((status = save_pers(self, obj, self->pers_func)) != 0)
2224 goto done;
2225 }
2226
2227 type = Py_TYPE(obj);
2228
2229 /* XXX: The old cPickle had an optimization that used switch-case
2230 statement dispatching on the first letter of the type name. It was
2231 probably not a bad idea after all. If benchmarks shows that particular
2232 optimization had some real benefits, it would be nice to add it
2233 back. */
2234
2235 /* Atom types; these aren't memoized, so don't check the memo. */
2236
2237 if (obj == Py_None) {
2238 status = save_none(self, obj);
2239 goto done;
2240 }
2241 else if (obj == Py_False || obj == Py_True) {
2242 status = save_bool(self, obj);
2243 goto done;
2244 }
2245 else if (type == &PyLong_Type) {
2246 status = save_long(self, obj);
2247 goto done;
2248 }
2249 else if (type == &PyFloat_Type) {
2250 status = save_float(self, obj);
2251 goto done;
2252 }
2253
2254 /* Check the memo to see if it has the object. If so, generate
2255 a GET (or BINGET) opcode, instead of pickling the object
2256 once again. */
2257 memo_key = PyLong_FromVoidPtr(obj);
2258 if (memo_key == NULL)
2259 goto error;
2260 if (PyDict_GetItem(self->memo, memo_key)) {
2261 if (memo_get(self, memo_key) < 0)
2262 goto error;
2263 goto done;
2264 }
2265
2266 if (type == &PyBytes_Type) {
2267 status = save_bytes(self, obj);
2268 goto done;
2269 }
2270 else if (type == &PyUnicode_Type) {
2271 status = save_unicode(self, obj);
2272 goto done;
2273 }
2274 else if (type == &PyDict_Type) {
2275 status = save_dict(self, obj);
2276 goto done;
2277 }
2278 else if (type == &PyList_Type) {
2279 status = save_list(self, obj);
2280 goto done;
2281 }
2282 else if (type == &PyTuple_Type) {
2283 status = save_tuple(self, obj);
2284 goto done;
2285 }
2286 else if (type == &PyType_Type) {
2287 status = save_global(self, obj, NULL);
2288 goto done;
2289 }
2290 else if (type == &PyFunction_Type) {
2291 status = save_global(self, obj, NULL);
2292 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
2293 /* fall back to reduce */
2294 PyErr_Clear();
2295 }
2296 else {
2297 goto done;
2298 }
2299 }
2300 else if (type == &PyCFunction_Type) {
2301 status = save_global(self, obj, NULL);
2302 goto done;
2303 }
2304 else if (PyType_IsSubtype(type, &PyType_Type)) {
2305 status = save_global(self, obj, NULL);
2306 goto done;
2307 }
2308
2309 /* XXX: This part needs some unit tests. */
2310
2311 /* Get a reduction callable, and call it. This may come from
2312 * copyreg.dispatch_table, the object's __reduce_ex__ method,
2313 * or the object's __reduce__ method.
2314 */
2315 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
2316 if (reduce_func != NULL) {
2317 /* Here, the reference count of the reduce_func object returned by
2318 PyDict_GetItem needs to be increased to be consistent with the one
2319 returned by PyObject_GetAttr. This is allow us to blindly DECREF
2320 reduce_func at the end of the save() routine.
2321 */
2322 Py_INCREF(reduce_func);
2323 Py_INCREF(obj);
2324 reduce_value = pickler_call(self, reduce_func, obj);
2325 }
2326 else {
2327 static PyObject *reduce_str = NULL;
2328 static PyObject *reduce_ex_str = NULL;
2329
2330 /* Cache the name of the reduce methods. */
2331 if (reduce_str == NULL) {
2332 reduce_str = PyUnicode_InternFromString("__reduce__");
2333 if (reduce_str == NULL)
2334 goto error;
2335 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
2336 if (reduce_ex_str == NULL)
2337 goto error;
2338 }
2339
2340 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
2341 automatically defined as __reduce__. While this is convenient, this
2342 make it impossible to know which method was actually called. Of
2343 course, this is not a big deal. But still, it would be nice to let
2344 the user know which method was called when something go
2345 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
2346 don't actually have to check for a __reduce__ method. */
2347
2348 /* Check for a __reduce_ex__ method. */
2349 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
2350 if (reduce_func != NULL) {
2351 PyObject *proto;
2352 proto = PyLong_FromLong(self->proto);
2353 if (proto != NULL) {
2354 reduce_value = pickler_call(self, reduce_func, proto);
2355 }
2356 }
2357 else {
2358 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2359 PyErr_Clear();
2360 else
2361 goto error;
2362 /* Check for a __reduce__ method. */
2363 reduce_func = PyObject_GetAttr(obj, reduce_str);
2364 if (reduce_func != NULL) {
2365 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
2366 }
2367 else {
2368 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
2369 type->tp_name, obj);
2370 goto error;
2371 }
2372 }
2373 }
2374
2375 if (reduce_value == NULL)
2376 goto error;
2377
2378 if (PyUnicode_Check(reduce_value)) {
2379 status = save_global(self, obj, reduce_value);
2380 goto done;
2381 }
2382
2383 if (!PyTuple_Check(reduce_value)) {
2384 PyErr_SetString(PicklingError,
2385 "__reduce__ must return a string or tuple");
2386 goto error;
2387 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002388
2389 status = save_reduce(self, reduce_value, obj);
2390
2391 if (0) {
2392 error:
2393 status = -1;
2394 }
2395 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002396 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002397 Py_XDECREF(memo_key);
2398 Py_XDECREF(reduce_func);
2399 Py_XDECREF(reduce_value);
2400
2401 return status;
2402}
2403
2404static int
2405dump(PicklerObject *self, PyObject *obj)
2406{
2407 const char stop_op = STOP;
2408
2409 if (self->proto >= 2) {
2410 char header[2];
2411
2412 header[0] = PROTO;
2413 assert(self->proto >= 0 && self->proto < 256);
2414 header[1] = (unsigned char)self->proto;
2415 if (pickler_write(self, header, 2) < 0)
2416 return -1;
2417 }
2418
2419 if (save(self, obj, 0) < 0 ||
2420 pickler_write(self, &stop_op, 1) < 0 ||
2421 pickler_write(self, NULL, 0) < 0)
2422 return -1;
2423
2424 return 0;
2425}
2426
2427PyDoc_STRVAR(Pickler_clear_memo_doc,
2428"clear_memo() -> None. Clears the pickler's \"memo\"."
2429"\n"
2430"The memo is the data structure that remembers which objects the\n"
2431"pickler has already seen, so that shared or recursive objects are\n"
2432"pickled by reference and not by value. This method is useful when\n"
2433"re-using picklers.");
2434
2435static PyObject *
2436Pickler_clear_memo(PicklerObject *self)
2437{
2438 if (self->memo)
2439 PyDict_Clear(self->memo);
2440
2441 Py_RETURN_NONE;
2442}
2443
2444PyDoc_STRVAR(Pickler_dump_doc,
2445"dump(obj) -> None. Write a pickled representation of obj to the open file.");
2446
2447static PyObject *
2448Pickler_dump(PicklerObject *self, PyObject *args)
2449{
2450 PyObject *obj;
2451
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00002452 /* Check whether the Pickler was initialized correctly (issue3664).
2453 Developers often forget to call __init__() in their subclasses, which
2454 would trigger a segfault without this check. */
2455 if (self->write == NULL) {
2456 PyErr_Format(PicklingError,
2457 "Pickler.__init__() was not called by %s.__init__()",
2458 Py_TYPE(self)->tp_name);
2459 return NULL;
2460 }
2461
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002462 if (!PyArg_ParseTuple(args, "O:dump", &obj))
2463 return NULL;
2464
2465 if (dump(self, obj) < 0)
2466 return NULL;
2467
2468 Py_RETURN_NONE;
2469}
2470
2471static struct PyMethodDef Pickler_methods[] = {
2472 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
2473 Pickler_dump_doc},
2474 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
2475 Pickler_clear_memo_doc},
2476 {NULL, NULL} /* sentinel */
2477};
2478
2479static void
2480Pickler_dealloc(PicklerObject *self)
2481{
2482 PyObject_GC_UnTrack(self);
2483
2484 Py_XDECREF(self->write);
2485 Py_XDECREF(self->memo);
2486 Py_XDECREF(self->pers_func);
2487 Py_XDECREF(self->arg);
2488 Py_XDECREF(self->fast_memo);
2489
2490 PyMem_Free(self->write_buf);
2491
2492 Py_TYPE(self)->tp_free((PyObject *)self);
2493}
2494
2495static int
2496Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
2497{
2498 Py_VISIT(self->write);
2499 Py_VISIT(self->memo);
2500 Py_VISIT(self->pers_func);
2501 Py_VISIT(self->arg);
2502 Py_VISIT(self->fast_memo);
2503 return 0;
2504}
2505
2506static int
2507Pickler_clear(PicklerObject *self)
2508{
2509 Py_CLEAR(self->write);
2510 Py_CLEAR(self->memo);
2511 Py_CLEAR(self->pers_func);
2512 Py_CLEAR(self->arg);
2513 Py_CLEAR(self->fast_memo);
2514
2515 PyMem_Free(self->write_buf);
2516 self->write_buf = NULL;
2517
2518 return 0;
2519}
2520
2521PyDoc_STRVAR(Pickler_doc,
2522"Pickler(file, protocol=None)"
2523"\n"
2524"This takes a binary file for writing a pickle data stream.\n"
2525"\n"
2526"The optional protocol argument tells the pickler to use the\n"
2527"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
2528"protocol is 3; a backward-incompatible protocol designed for\n"
2529"Python 3.0.\n"
2530"\n"
2531"Specifying a negative protocol version selects the highest\n"
2532"protocol version supported. The higher the protocol used, the\n"
2533"more recent the version of Python needed to read the pickle\n"
2534"produced.\n"
2535"\n"
2536"The file argument must have a write() method that accepts a single\n"
2537"bytes argument. It can thus be a file object opened for binary\n"
2538"writing, a io.BytesIO instance, or any other custom object that\n"
2539"meets this interface.\n");
2540
2541static int
2542Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
2543{
2544 static char *kwlist[] = {"file", "protocol", 0};
2545 PyObject *file;
2546 PyObject *proto_obj = NULL;
2547 long proto = 0;
2548
2549 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:Pickler",
2550 kwlist, &file, &proto_obj))
2551 return -1;
2552
2553 /* In case of multiple __init__() calls, clear previous content. */
2554 if (self->write != NULL)
2555 (void)Pickler_clear(self);
2556
2557 if (proto_obj == NULL || proto_obj == Py_None)
2558 proto = DEFAULT_PROTOCOL;
2559 else
2560 proto = PyLong_AsLong(proto_obj);
2561
2562 if (proto < 0)
2563 proto = HIGHEST_PROTOCOL;
2564 if (proto > HIGHEST_PROTOCOL) {
2565 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
2566 HIGHEST_PROTOCOL);
2567 return -1;
2568 }
2569
2570 self->proto = proto;
2571 self->bin = proto > 0;
2572 self->arg = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002573 self->fast = 0;
2574 self->fast_nesting = 0;
2575 self->fast_memo = NULL;
2576
2577 if (!PyObject_HasAttrString(file, "write")) {
2578 PyErr_SetString(PyExc_TypeError,
2579 "file must have a 'write' attribute");
2580 return -1;
2581 }
2582 self->write = PyObject_GetAttrString(file, "write");
2583 if (self->write == NULL)
2584 return -1;
2585 self->buf_size = 0;
2586 self->write_buf = (char *)PyMem_Malloc(WRITE_BUF_SIZE);
2587 if (self->write_buf == NULL) {
2588 PyErr_NoMemory();
2589 return -1;
2590 }
2591 self->pers_func = NULL;
2592 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
2593 self->pers_func = PyObject_GetAttrString((PyObject *)self,
2594 "persistent_id");
2595 if (self->pers_func == NULL)
2596 return -1;
2597 }
2598 self->memo = PyDict_New();
2599 if (self->memo == NULL)
2600 return -1;
2601
2602 return 0;
2603}
2604
2605static PyObject *
2606Pickler_get_memo(PicklerObject *self)
2607{
2608 if (self->memo == NULL)
2609 PyErr_SetString(PyExc_AttributeError, "memo");
2610 else
2611 Py_INCREF(self->memo);
2612 return self->memo;
2613}
2614
2615static int
2616Pickler_set_memo(PicklerObject *self, PyObject *value)
2617{
2618 PyObject *tmp;
2619
2620 if (value == NULL) {
2621 PyErr_SetString(PyExc_TypeError,
2622 "attribute deletion is not supported");
2623 return -1;
2624 }
2625 if (!PyDict_Check(value)) {
2626 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
2627 return -1;
2628 }
2629
2630 tmp = self->memo;
2631 Py_INCREF(value);
2632 self->memo = value;
2633 Py_XDECREF(tmp);
2634
2635 return 0;
2636}
2637
2638static PyObject *
2639Pickler_get_persid(PicklerObject *self)
2640{
2641 if (self->pers_func == NULL)
2642 PyErr_SetString(PyExc_AttributeError, "persistent_id");
2643 else
2644 Py_INCREF(self->pers_func);
2645 return self->pers_func;
2646}
2647
2648static int
2649Pickler_set_persid(PicklerObject *self, PyObject *value)
2650{
2651 PyObject *tmp;
2652
2653 if (value == NULL) {
2654 PyErr_SetString(PyExc_TypeError,
2655 "attribute deletion is not supported");
2656 return -1;
2657 }
2658 if (!PyCallable_Check(value)) {
2659 PyErr_SetString(PyExc_TypeError,
2660 "persistent_id must be a callable taking one argument");
2661 return -1;
2662 }
2663
2664 tmp = self->pers_func;
2665 Py_INCREF(value);
2666 self->pers_func = value;
2667 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
2668
2669 return 0;
2670}
2671
2672static PyMemberDef Pickler_members[] = {
2673 {"bin", T_INT, offsetof(PicklerObject, bin)},
2674 {"fast", T_INT, offsetof(PicklerObject, fast)},
2675 {NULL}
2676};
2677
2678static PyGetSetDef Pickler_getsets[] = {
2679 {"memo", (getter)Pickler_get_memo,
2680 (setter)Pickler_set_memo},
2681 {"persistent_id", (getter)Pickler_get_persid,
2682 (setter)Pickler_set_persid},
2683 {NULL}
2684};
2685
2686static PyTypeObject Pickler_Type = {
2687 PyVarObject_HEAD_INIT(NULL, 0)
2688 "_pickle.Pickler" , /*tp_name*/
2689 sizeof(PicklerObject), /*tp_basicsize*/
2690 0, /*tp_itemsize*/
2691 (destructor)Pickler_dealloc, /*tp_dealloc*/
2692 0, /*tp_print*/
2693 0, /*tp_getattr*/
2694 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00002695 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002696 0, /*tp_repr*/
2697 0, /*tp_as_number*/
2698 0, /*tp_as_sequence*/
2699 0, /*tp_as_mapping*/
2700 0, /*tp_hash*/
2701 0, /*tp_call*/
2702 0, /*tp_str*/
2703 0, /*tp_getattro*/
2704 0, /*tp_setattro*/
2705 0, /*tp_as_buffer*/
2706 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2707 Pickler_doc, /*tp_doc*/
2708 (traverseproc)Pickler_traverse, /*tp_traverse*/
2709 (inquiry)Pickler_clear, /*tp_clear*/
2710 0, /*tp_richcompare*/
2711 0, /*tp_weaklistoffset*/
2712 0, /*tp_iter*/
2713 0, /*tp_iternext*/
2714 Pickler_methods, /*tp_methods*/
2715 Pickler_members, /*tp_members*/
2716 Pickler_getsets, /*tp_getset*/
2717 0, /*tp_base*/
2718 0, /*tp_dict*/
2719 0, /*tp_descr_get*/
2720 0, /*tp_descr_set*/
2721 0, /*tp_dictoffset*/
2722 (initproc)Pickler_init, /*tp_init*/
2723 PyType_GenericAlloc, /*tp_alloc*/
2724 PyType_GenericNew, /*tp_new*/
2725 PyObject_GC_Del, /*tp_free*/
2726 0, /*tp_is_gc*/
2727};
2728
2729/* Temporary helper for calling self.find_class().
2730
2731 XXX: It would be nice to able to avoid Python function call overhead, by
2732 using directly the C version of find_class(), when find_class() is not
2733 overridden by a subclass. Although, this could become rather hackish. A
2734 simpler optimization would be to call the C function when self is not a
2735 subclass instance. */
2736static PyObject *
2737find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
2738{
2739 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
2740 module_name, global_name);
2741}
2742
2743static int
2744marker(UnpicklerObject *self)
2745{
2746 if (self->num_marks < 1) {
2747 PyErr_SetString(UnpicklingError, "could not find MARK");
2748 return -1;
2749 }
2750
2751 return self->marks[--self->num_marks];
2752}
2753
2754static int
2755load_none(UnpicklerObject *self)
2756{
2757 PDATA_APPEND(self->stack, Py_None, -1);
2758 return 0;
2759}
2760
2761static int
2762bad_readline(void)
2763{
2764 PyErr_SetString(UnpicklingError, "pickle data was truncated");
2765 return -1;
2766}
2767
2768static int
2769load_int(UnpicklerObject *self)
2770{
2771 PyObject *value;
2772 char *endptr, *s;
2773 Py_ssize_t len;
2774 long x;
2775
2776 if ((len = unpickler_readline(self, &s)) < 0)
2777 return -1;
2778 if (len < 2)
2779 return bad_readline();
2780
2781 errno = 0;
2782 /* XXX: Should the base argument of strtol() be explicitly set to 10? */
2783 x = strtol(s, &endptr, 0);
2784
2785 if (errno || (*endptr != '\n') || (endptr[1] != '\0')) {
2786 /* Hm, maybe we've got something long. Let's try reading
2787 * it as a Python long object. */
2788 errno = 0;
2789 /* XXX: Same thing about the base here. */
2790 value = PyLong_FromString(s, NULL, 0);
2791 if (value == NULL) {
2792 PyErr_SetString(PyExc_ValueError,
2793 "could not convert string to int");
2794 return -1;
2795 }
2796 }
2797 else {
2798 if (len == 3 && (x == 0 || x == 1)) {
2799 if ((value = PyBool_FromLong(x)) == NULL)
2800 return -1;
2801 }
2802 else {
2803 if ((value = PyLong_FromLong(x)) == NULL)
2804 return -1;
2805 }
2806 }
2807
2808 PDATA_PUSH(self->stack, value, -1);
2809 return 0;
2810}
2811
2812static int
2813load_bool(UnpicklerObject *self, PyObject *boolean)
2814{
2815 assert(boolean == Py_True || boolean == Py_False);
2816 PDATA_APPEND(self->stack, boolean, -1);
2817 return 0;
2818}
2819
2820/* s contains x bytes of a little-endian integer. Return its value as a
2821 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
2822 * int, but when x is 4 it's a signed one. This is an historical source
2823 * of x-platform bugs.
2824 */
2825static long
2826calc_binint(char *bytes, int size)
2827{
2828 unsigned char *s = (unsigned char *)bytes;
2829 int i = size;
2830 long x = 0;
2831
2832 for (i = 0; i < size; i++) {
2833 x |= (long)s[i] << (i * 8);
2834 }
2835
2836 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
2837 * is signed, so on a box with longs bigger than 4 bytes we need
2838 * to extend a BININT's sign bit to the full width.
2839 */
2840 if (SIZEOF_LONG > 4 && size == 4) {
2841 x |= -(x & (1L << 31));
2842 }
2843
2844 return x;
2845}
2846
2847static int
2848load_binintx(UnpicklerObject *self, char *s, int size)
2849{
2850 PyObject *value;
2851 long x;
2852
2853 x = calc_binint(s, size);
2854
2855 if ((value = PyLong_FromLong(x)) == NULL)
2856 return -1;
2857
2858 PDATA_PUSH(self->stack, value, -1);
2859 return 0;
2860}
2861
2862static int
2863load_binint(UnpicklerObject *self)
2864{
2865 char *s;
2866
2867 if (unpickler_read(self, &s, 4) < 0)
2868 return -1;
2869
2870 return load_binintx(self, s, 4);
2871}
2872
2873static int
2874load_binint1(UnpicklerObject *self)
2875{
2876 char *s;
2877
2878 if (unpickler_read(self, &s, 1) < 0)
2879 return -1;
2880
2881 return load_binintx(self, s, 1);
2882}
2883
2884static int
2885load_binint2(UnpicklerObject *self)
2886{
2887 char *s;
2888
2889 if (unpickler_read(self, &s, 2) < 0)
2890 return -1;
2891
2892 return load_binintx(self, s, 2);
2893}
2894
2895static int
2896load_long(UnpicklerObject *self)
2897{
2898 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00002899 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002900 Py_ssize_t len;
2901
2902 if ((len = unpickler_readline(self, &s)) < 0)
2903 return -1;
2904 if (len < 2)
2905 return bad_readline();
2906
Mark Dickinson8dd05142009-01-20 20:43:58 +00002907 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
2908 the 'L' before calling PyLong_FromString. In order to maintain
2909 compatibility with Python 3.0.0, we don't actually *require*
2910 the 'L' to be present. */
2911 if (s[len-2] == 'L') {
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00002912 s[len-2] = '\0';
Mark Dickinson8dd05142009-01-20 20:43:58 +00002913 }
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00002914 /* XXX: Should the base argument explicitly set to 10? */
2915 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00002916 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002917 return -1;
2918
2919 PDATA_PUSH(self->stack, value, -1);
2920 return 0;
2921}
2922
2923/* 'size' bytes contain the # of bytes of little-endian 256's-complement
2924 * data following.
2925 */
2926static int
2927load_counted_long(UnpicklerObject *self, int size)
2928{
2929 PyObject *value;
2930 char *nbytes;
2931 char *pdata;
2932
2933 assert(size == 1 || size == 4);
2934 if (unpickler_read(self, &nbytes, size) < 0)
2935 return -1;
2936
2937 size = calc_binint(nbytes, size);
2938 if (size < 0) {
2939 /* Corrupt or hostile pickle -- we never write one like this */
2940 PyErr_SetString(UnpicklingError,
2941 "LONG pickle has negative byte count");
2942 return -1;
2943 }
2944
2945 if (size == 0)
2946 value = PyLong_FromLong(0L);
2947 else {
2948 /* Read the raw little-endian bytes and convert. */
2949 if (unpickler_read(self, &pdata, size) < 0)
2950 return -1;
2951 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
2952 1 /* little endian */ , 1 /* signed */ );
2953 }
2954 if (value == NULL)
2955 return -1;
2956 PDATA_PUSH(self->stack, value, -1);
2957 return 0;
2958}
2959
2960static int
2961load_float(UnpicklerObject *self)
2962{
2963 PyObject *value;
2964 char *endptr, *s;
2965 Py_ssize_t len;
2966 double d;
2967
2968 if ((len = unpickler_readline(self, &s)) < 0)
2969 return -1;
2970 if (len < 2)
2971 return bad_readline();
2972
2973 errno = 0;
2974 d = PyOS_ascii_strtod(s, &endptr);
2975
Mark Dickinsoncddcf442009-01-24 21:46:33 +00002976 if ((errno == ERANGE && !(fabs(d) <= 1.0)) ||
2977 (endptr[0] != '\n') || (endptr[1] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002978 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
2979 return -1;
2980 }
2981
2982 if ((value = PyFloat_FromDouble(d)) == NULL)
2983 return -1;
2984
2985 PDATA_PUSH(self->stack, value, -1);
2986 return 0;
2987}
2988
2989static int
2990load_binfloat(UnpicklerObject *self)
2991{
2992 PyObject *value;
2993 double x;
2994 char *s;
2995
2996 if (unpickler_read(self, &s, 8) < 0)
2997 return -1;
2998
2999 x = _PyFloat_Unpack8((unsigned char *)s, 0);
3000 if (x == -1.0 && PyErr_Occurred())
3001 return -1;
3002
3003 if ((value = PyFloat_FromDouble(x)) == NULL)
3004 return -1;
3005
3006 PDATA_PUSH(self->stack, value, -1);
3007 return 0;
3008}
3009
3010static int
3011load_string(UnpicklerObject *self)
3012{
3013 PyObject *bytes;
3014 PyObject *str = NULL;
3015 Py_ssize_t len;
3016 char *s, *p;
3017
3018 if ((len = unpickler_readline(self, &s)) < 0)
3019 return -1;
3020 if (len < 3)
3021 return bad_readline();
3022 if ((s = strdup(s)) == NULL) {
3023 PyErr_NoMemory();
3024 return -1;
3025 }
3026
3027 /* Strip outermost quotes */
3028 while (s[len - 1] <= ' ')
3029 len--;
3030 if (s[0] == '"' && s[len - 1] == '"') {
3031 s[len - 1] = '\0';
3032 p = s + 1;
3033 len -= 2;
3034 }
3035 else if (s[0] == '\'' && s[len - 1] == '\'') {
3036 s[len - 1] = '\0';
3037 p = s + 1;
3038 len -= 2;
3039 }
3040 else {
3041 free(s);
3042 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
3043 return -1;
3044 }
3045
3046 /* Use the PyBytes API to decode the string, since that is what is used
3047 to encode, and then coerce the result to Unicode. */
3048 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
3049 free(s);
3050 if (bytes == NULL)
3051 return -1;
3052 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
3053 Py_DECREF(bytes);
3054 if (str == NULL)
3055 return -1;
3056
3057 PDATA_PUSH(self->stack, str, -1);
3058 return 0;
3059}
3060
3061static int
3062load_binbytes(UnpicklerObject *self)
3063{
3064 PyObject *bytes;
3065 long x;
3066 char *s;
3067
3068 if (unpickler_read(self, &s, 4) < 0)
3069 return -1;
3070
3071 x = calc_binint(s, 4);
3072 if (x < 0) {
3073 PyErr_SetString(UnpicklingError,
3074 "BINBYTES pickle has negative byte count");
3075 return -1;
3076 }
3077
3078 if (unpickler_read(self, &s, x) < 0)
3079 return -1;
3080 bytes = PyBytes_FromStringAndSize(s, x);
3081 if (bytes == NULL)
3082 return -1;
3083
3084 PDATA_PUSH(self->stack, bytes, -1);
3085 return 0;
3086}
3087
3088static int
3089load_short_binbytes(UnpicklerObject *self)
3090{
3091 PyObject *bytes;
3092 unsigned char x;
3093 char *s;
3094
3095 if (unpickler_read(self, &s, 1) < 0)
3096 return -1;
3097
3098 x = (unsigned char)s[0];
3099
3100 if (unpickler_read(self, &s, x) < 0)
3101 return -1;
3102
3103 bytes = PyBytes_FromStringAndSize(s, x);
3104 if (bytes == NULL)
3105 return -1;
3106
3107 PDATA_PUSH(self->stack, bytes, -1);
3108 return 0;
3109}
3110
3111static int
3112load_binstring(UnpicklerObject *self)
3113{
3114 PyObject *str;
3115 long x;
3116 char *s;
3117
3118 if (unpickler_read(self, &s, 4) < 0)
3119 return -1;
3120
3121 x = calc_binint(s, 4);
3122 if (x < 0) {
3123 PyErr_SetString(UnpicklingError,
3124 "BINSTRING pickle has negative byte count");
3125 return -1;
3126 }
3127
3128 if (unpickler_read(self, &s, x) < 0)
3129 return -1;
3130
3131 /* Convert Python 2.x strings to unicode. */
3132 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3133 if (str == NULL)
3134 return -1;
3135
3136 PDATA_PUSH(self->stack, str, -1);
3137 return 0;
3138}
3139
3140static int
3141load_short_binstring(UnpicklerObject *self)
3142{
3143 PyObject *str;
3144 unsigned char x;
3145 char *s;
3146
3147 if (unpickler_read(self, &s, 1) < 0)
3148 return -1;
3149
3150 x = (unsigned char)s[0];
3151
3152 if (unpickler_read(self, &s, x) < 0)
3153 return -1;
3154
3155 /* Convert Python 2.x strings to unicode. */
3156 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3157 if (str == NULL)
3158 return -1;
3159
3160 PDATA_PUSH(self->stack, str, -1);
3161 return 0;
3162}
3163
3164static int
3165load_unicode(UnpicklerObject *self)
3166{
3167 PyObject *str;
3168 Py_ssize_t len;
3169 char *s;
3170
3171 if ((len = unpickler_readline(self, &s)) < 0)
3172 return -1;
3173 if (len < 1)
3174 return bad_readline();
3175
3176 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
3177 if (str == NULL)
3178 return -1;
3179
3180 PDATA_PUSH(self->stack, str, -1);
3181 return 0;
3182}
3183
3184static int
3185load_binunicode(UnpicklerObject *self)
3186{
3187 PyObject *str;
3188 long size;
3189 char *s;
3190
3191 if (unpickler_read(self, &s, 4) < 0)
3192 return -1;
3193
3194 size = calc_binint(s, 4);
3195 if (size < 0) {
3196 PyErr_SetString(UnpicklingError,
3197 "BINUNICODE pickle has negative byte count");
3198 return -1;
3199 }
3200
3201 if (unpickler_read(self, &s, size) < 0)
3202 return -1;
3203
3204 str = PyUnicode_DecodeUTF8(s, size, NULL);
3205 if (str == NULL)
3206 return -1;
3207
3208 PDATA_PUSH(self->stack, str, -1);
3209 return 0;
3210}
3211
3212static int
3213load_tuple(UnpicklerObject *self)
3214{
3215 PyObject *tuple;
3216 int i;
3217
3218 if ((i = marker(self)) < 0)
3219 return -1;
3220
3221 tuple = Pdata_poptuple(self->stack, i);
3222 if (tuple == NULL)
3223 return -1;
3224 PDATA_PUSH(self->stack, tuple, -1);
3225 return 0;
3226}
3227
3228static int
3229load_counted_tuple(UnpicklerObject *self, int len)
3230{
3231 PyObject *tuple;
3232
3233 tuple = PyTuple_New(len);
3234 if (tuple == NULL)
3235 return -1;
3236
3237 while (--len >= 0) {
3238 PyObject *item;
3239
3240 PDATA_POP(self->stack, item);
3241 if (item == NULL)
3242 return -1;
3243 PyTuple_SET_ITEM(tuple, len, item);
3244 }
3245 PDATA_PUSH(self->stack, tuple, -1);
3246 return 0;
3247}
3248
3249static int
3250load_empty_list(UnpicklerObject *self)
3251{
3252 PyObject *list;
3253
3254 if ((list = PyList_New(0)) == NULL)
3255 return -1;
3256 PDATA_PUSH(self->stack, list, -1);
3257 return 0;
3258}
3259
3260static int
3261load_empty_dict(UnpicklerObject *self)
3262{
3263 PyObject *dict;
3264
3265 if ((dict = PyDict_New()) == NULL)
3266 return -1;
3267 PDATA_PUSH(self->stack, dict, -1);
3268 return 0;
3269}
3270
3271static int
3272load_list(UnpicklerObject *self)
3273{
3274 PyObject *list;
3275 int i;
3276
3277 if ((i = marker(self)) < 0)
3278 return -1;
3279
3280 list = Pdata_poplist(self->stack, i);
3281 if (list == NULL)
3282 return -1;
3283 PDATA_PUSH(self->stack, list, -1);
3284 return 0;
3285}
3286
3287static int
3288load_dict(UnpicklerObject *self)
3289{
3290 PyObject *dict, *key, *value;
3291 int i, j, k;
3292
3293 if ((i = marker(self)) < 0)
3294 return -1;
3295 j = self->stack->length;
3296
3297 if ((dict = PyDict_New()) == NULL)
3298 return -1;
3299
3300 for (k = i + 1; k < j; k += 2) {
3301 key = self->stack->data[k - 1];
3302 value = self->stack->data[k];
3303 if (PyDict_SetItem(dict, key, value) < 0) {
3304 Py_DECREF(dict);
3305 return -1;
3306 }
3307 }
3308 Pdata_clear(self->stack, i);
3309 PDATA_PUSH(self->stack, dict, -1);
3310 return 0;
3311}
3312
3313static PyObject *
3314instantiate(PyObject *cls, PyObject *args)
3315{
3316 PyObject *r = NULL;
3317
3318 /* XXX: The pickle.py module does not create instances this way when the
3319 args tuple is empty. See Unpickler._instantiate(). */
3320 if ((r = PyObject_CallObject(cls, args)))
3321 return r;
3322
3323 /* XXX: Is this still nescessary? */
3324 {
3325 PyObject *tp, *v, *tb, *tmp_value;
3326
3327 PyErr_Fetch(&tp, &v, &tb);
3328 tmp_value = v;
3329 /* NULL occurs when there was a KeyboardInterrupt */
3330 if (tmp_value == NULL)
3331 tmp_value = Py_None;
3332 if ((r = PyTuple_Pack(3, tmp_value, cls, args))) {
3333 Py_XDECREF(v);
3334 v = r;
3335 }
3336 PyErr_Restore(tp, v, tb);
3337 }
3338 return NULL;
3339}
3340
3341static int
3342load_obj(UnpicklerObject *self)
3343{
3344 PyObject *cls, *args, *obj = NULL;
3345 int i;
3346
3347 if ((i = marker(self)) < 0)
3348 return -1;
3349
3350 args = Pdata_poptuple(self->stack, i + 1);
3351 if (args == NULL)
3352 return -1;
3353
3354 PDATA_POP(self->stack, cls);
3355 if (cls) {
3356 obj = instantiate(cls, args);
3357 Py_DECREF(cls);
3358 }
3359 Py_DECREF(args);
3360 if (obj == NULL)
3361 return -1;
3362
3363 PDATA_PUSH(self->stack, obj, -1);
3364 return 0;
3365}
3366
3367static int
3368load_inst(UnpicklerObject *self)
3369{
3370 PyObject *cls = NULL;
3371 PyObject *args = NULL;
3372 PyObject *obj = NULL;
3373 PyObject *module_name;
3374 PyObject *class_name;
3375 Py_ssize_t len;
3376 int i;
3377 char *s;
3378
3379 if ((i = marker(self)) < 0)
3380 return -1;
3381 if ((len = unpickler_readline(self, &s)) < 0)
3382 return -1;
3383 if (len < 2)
3384 return bad_readline();
3385
3386 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
3387 identifiers are permitted in Python 3.0, since the INST opcode is only
3388 supported by older protocols on Python 2.x. */
3389 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3390 if (module_name == NULL)
3391 return -1;
3392
3393 if ((len = unpickler_readline(self, &s)) >= 0) {
3394 if (len < 2)
3395 return bad_readline();
3396 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3397 if (class_name == NULL) {
3398 cls = find_class(self, module_name, class_name);
3399 Py_DECREF(class_name);
3400 }
3401 }
3402 Py_DECREF(module_name);
3403
3404 if (cls == NULL)
3405 return -1;
3406
3407 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
3408 obj = instantiate(cls, args);
3409 Py_DECREF(args);
3410 }
3411 Py_DECREF(cls);
3412
3413 if (obj == NULL)
3414 return -1;
3415
3416 PDATA_PUSH(self->stack, obj, -1);
3417 return 0;
3418}
3419
3420static int
3421load_newobj(UnpicklerObject *self)
3422{
3423 PyObject *args = NULL;
3424 PyObject *clsraw = NULL;
3425 PyTypeObject *cls; /* clsraw cast to its true type */
3426 PyObject *obj;
3427
3428 /* Stack is ... cls argtuple, and we want to call
3429 * cls.__new__(cls, *argtuple).
3430 */
3431 PDATA_POP(self->stack, args);
3432 if (args == NULL)
3433 goto error;
3434 if (!PyTuple_Check(args)) {
3435 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
3436 goto error;
3437 }
3438
3439 PDATA_POP(self->stack, clsraw);
3440 cls = (PyTypeObject *)clsraw;
3441 if (cls == NULL)
3442 goto error;
3443 if (!PyType_Check(cls)) {
3444 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3445 "isn't a type object");
3446 goto error;
3447 }
3448 if (cls->tp_new == NULL) {
3449 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3450 "has NULL tp_new");
3451 goto error;
3452 }
3453
3454 /* Call __new__. */
3455 obj = cls->tp_new(cls, args, NULL);
3456 if (obj == NULL)
3457 goto error;
3458
3459 Py_DECREF(args);
3460 Py_DECREF(clsraw);
3461 PDATA_PUSH(self->stack, obj, -1);
3462 return 0;
3463
3464 error:
3465 Py_XDECREF(args);
3466 Py_XDECREF(clsraw);
3467 return -1;
3468}
3469
3470static int
3471load_global(UnpicklerObject *self)
3472{
3473 PyObject *global = NULL;
3474 PyObject *module_name;
3475 PyObject *global_name;
3476 Py_ssize_t len;
3477 char *s;
3478
3479 if ((len = unpickler_readline(self, &s)) < 0)
3480 return -1;
3481 if (len < 2)
3482 return bad_readline();
3483 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3484 if (!module_name)
3485 return -1;
3486
3487 if ((len = unpickler_readline(self, &s)) >= 0) {
3488 if (len < 2) {
3489 Py_DECREF(module_name);
3490 return bad_readline();
3491 }
3492 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3493 if (global_name) {
3494 global = find_class(self, module_name, global_name);
3495 Py_DECREF(global_name);
3496 }
3497 }
3498 Py_DECREF(module_name);
3499
3500 if (global == NULL)
3501 return -1;
3502 PDATA_PUSH(self->stack, global, -1);
3503 return 0;
3504}
3505
3506static int
3507load_persid(UnpicklerObject *self)
3508{
3509 PyObject *pid;
3510 Py_ssize_t len;
3511 char *s;
3512
3513 if (self->pers_func) {
3514 if ((len = unpickler_readline(self, &s)) < 0)
3515 return -1;
3516 if (len < 2)
3517 return bad_readline();
3518
3519 pid = PyBytes_FromStringAndSize(s, len - 1);
3520 if (pid == NULL)
3521 return -1;
3522
3523 /* Ugh... this does not leak since unpickler_call() steals the
3524 reference to pid first. */
3525 pid = unpickler_call(self, self->pers_func, pid);
3526 if (pid == NULL)
3527 return -1;
3528
3529 PDATA_PUSH(self->stack, pid, -1);
3530 return 0;
3531 }
3532 else {
3533 PyErr_SetString(UnpicklingError,
3534 "A load persistent id instruction was encountered,\n"
3535 "but no persistent_load function was specified.");
3536 return -1;
3537 }
3538}
3539
3540static int
3541load_binpersid(UnpicklerObject *self)
3542{
3543 PyObject *pid;
3544
3545 if (self->pers_func) {
3546 PDATA_POP(self->stack, pid);
3547 if (pid == NULL)
3548 return -1;
3549
3550 /* Ugh... this does not leak since unpickler_call() steals the
3551 reference to pid first. */
3552 pid = unpickler_call(self, self->pers_func, pid);
3553 if (pid == NULL)
3554 return -1;
3555
3556 PDATA_PUSH(self->stack, pid, -1);
3557 return 0;
3558 }
3559 else {
3560 PyErr_SetString(UnpicklingError,
3561 "A load persistent id instruction was encountered,\n"
3562 "but no persistent_load function was specified.");
3563 return -1;
3564 }
3565}
3566
3567static int
3568load_pop(UnpicklerObject *self)
3569{
3570 int len;
3571
3572 if ((len = self->stack->length) <= 0)
3573 return stack_underflow();
3574
3575 /* Note that we split the (pickle.py) stack into two stacks,
3576 * an object stack and a mark stack. We have to be clever and
3577 * pop the right one. We do this by looking at the top of the
3578 * mark stack.
3579 */
3580
3581 if ((self->num_marks > 0) && (self->marks[self->num_marks - 1] == len))
3582 self->num_marks--;
3583 else {
3584 len--;
3585 Py_DECREF(self->stack->data[len]);
3586 self->stack->length = len;
3587 }
3588
3589 return 0;
3590}
3591
3592static int
3593load_pop_mark(UnpicklerObject *self)
3594{
3595 int i;
3596
3597 if ((i = marker(self)) < 0)
3598 return -1;
3599
3600 Pdata_clear(self->stack, i);
3601
3602 return 0;
3603}
3604
3605static int
3606load_dup(UnpicklerObject *self)
3607{
3608 PyObject *last;
3609 int len;
3610
3611 if ((len = self->stack->length) <= 0)
3612 return stack_underflow();
3613 last = self->stack->data[len - 1];
3614 PDATA_APPEND(self->stack, last, -1);
3615 return 0;
3616}
3617
3618static int
3619load_get(UnpicklerObject *self)
3620{
3621 PyObject *key, *value;
3622 Py_ssize_t len;
3623 char *s;
3624
3625 if ((len = unpickler_readline(self, &s)) < 0)
3626 return -1;
3627 if (len < 2)
3628 return bad_readline();
3629
3630 key = PyLong_FromString(s, NULL, 10);
3631 if (key == NULL)
3632 return -1;
3633
3634 value = PyDict_GetItemWithError(self->memo, key);
3635 if (value == NULL) {
3636 if (!PyErr_Occurred())
3637 PyErr_SetObject(PyExc_KeyError, key);
3638 Py_DECREF(key);
3639 return -1;
3640 }
3641 Py_DECREF(key);
3642
3643 PDATA_APPEND(self->stack, value, -1);
3644 return 0;
3645}
3646
3647static int
3648load_binget(UnpicklerObject *self)
3649{
3650 PyObject *key, *value;
3651 char *s;
3652
3653 if (unpickler_read(self, &s, 1) < 0)
3654 return -1;
3655
3656 /* Here, the unsigned cast is necessary to avoid negative values. */
3657 key = PyLong_FromLong((long)(unsigned char)s[0]);
3658 if (key == NULL)
3659 return -1;
3660
3661 value = PyDict_GetItemWithError(self->memo, key);
3662 if (value == NULL) {
3663 if (!PyErr_Occurred())
3664 PyErr_SetObject(PyExc_KeyError, key);
3665 Py_DECREF(key);
3666 return -1;
3667 }
3668 Py_DECREF(key);
3669
3670 PDATA_APPEND(self->stack, value, -1);
3671 return 0;
3672}
3673
3674static int
3675load_long_binget(UnpicklerObject *self)
3676{
3677 PyObject *key, *value;
3678 char *s;
3679 long k;
3680
3681 if (unpickler_read(self, &s, 4) < 0)
3682 return -1;
3683
3684 k = (long)(unsigned char)s[0];
3685 k |= (long)(unsigned char)s[1] << 8;
3686 k |= (long)(unsigned char)s[2] << 16;
3687 k |= (long)(unsigned char)s[3] << 24;
3688
3689 key = PyLong_FromLong(k);
3690 if (key == NULL)
3691 return -1;
3692
3693 value = PyDict_GetItemWithError(self->memo, key);
3694 if (value == NULL) {
3695 if (!PyErr_Occurred())
3696 PyErr_SetObject(PyExc_KeyError, key);
3697 Py_DECREF(key);
3698 return -1;
3699 }
3700 Py_DECREF(key);
3701
3702 PDATA_APPEND(self->stack, value, -1);
3703 return 0;
3704}
3705
3706/* Push an object from the extension registry (EXT[124]). nbytes is
3707 * the number of bytes following the opcode, holding the index (code) value.
3708 */
3709static int
3710load_extension(UnpicklerObject *self, int nbytes)
3711{
3712 char *codebytes; /* the nbytes bytes after the opcode */
3713 long code; /* calc_binint returns long */
3714 PyObject *py_code; /* code as a Python int */
3715 PyObject *obj; /* the object to push */
3716 PyObject *pair; /* (module_name, class_name) */
3717 PyObject *module_name, *class_name;
3718
3719 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
3720 if (unpickler_read(self, &codebytes, nbytes) < 0)
3721 return -1;
3722 code = calc_binint(codebytes, nbytes);
3723 if (code <= 0) { /* note that 0 is forbidden */
3724 /* Corrupt or hostile pickle. */
3725 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
3726 return -1;
3727 }
3728
3729 /* Look for the code in the cache. */
3730 py_code = PyLong_FromLong(code);
3731 if (py_code == NULL)
3732 return -1;
3733 obj = PyDict_GetItem(extension_cache, py_code);
3734 if (obj != NULL) {
3735 /* Bingo. */
3736 Py_DECREF(py_code);
3737 PDATA_APPEND(self->stack, obj, -1);
3738 return 0;
3739 }
3740
3741 /* Look up the (module_name, class_name) pair. */
3742 pair = PyDict_GetItem(inverted_registry, py_code);
3743 if (pair == NULL) {
3744 Py_DECREF(py_code);
3745 PyErr_Format(PyExc_ValueError, "unregistered extension "
3746 "code %ld", code);
3747 return -1;
3748 }
3749 /* Since the extension registry is manipulable via Python code,
3750 * confirm that pair is really a 2-tuple of strings.
3751 */
3752 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
3753 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
3754 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
3755 Py_DECREF(py_code);
3756 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
3757 "isn't a 2-tuple of strings", code);
3758 return -1;
3759 }
3760 /* Load the object. */
3761 obj = find_class(self, module_name, class_name);
3762 if (obj == NULL) {
3763 Py_DECREF(py_code);
3764 return -1;
3765 }
3766 /* Cache code -> obj. */
3767 code = PyDict_SetItem(extension_cache, py_code, obj);
3768 Py_DECREF(py_code);
3769 if (code < 0) {
3770 Py_DECREF(obj);
3771 return -1;
3772 }
3773 PDATA_PUSH(self->stack, obj, -1);
3774 return 0;
3775}
3776
3777static int
3778load_put(UnpicklerObject *self)
3779{
3780 PyObject *key, *value;
3781 Py_ssize_t len;
3782 char *s;
3783 int x;
3784
3785 if ((len = unpickler_readline(self, &s)) < 0)
3786 return -1;
3787 if (len < 2)
3788 return bad_readline();
3789 if ((x = self->stack->length) <= 0)
3790 return stack_underflow();
3791
3792 key = PyLong_FromString(s, NULL, 10);
3793 if (key == NULL)
3794 return -1;
3795 value = self->stack->data[x - 1];
3796
3797 x = PyDict_SetItem(self->memo, key, value);
3798 Py_DECREF(key);
3799 return x;
3800}
3801
3802static int
3803load_binput(UnpicklerObject *self)
3804{
3805 PyObject *key, *value;
3806 char *s;
3807 int x;
3808
3809 if (unpickler_read(self, &s, 1) < 0)
3810 return -1;
3811 if ((x = self->stack->length) <= 0)
3812 return stack_underflow();
3813
3814 key = PyLong_FromLong((long)(unsigned char)s[0]);
3815 if (key == NULL)
3816 return -1;
3817 value = self->stack->data[x - 1];
3818
3819 x = PyDict_SetItem(self->memo, key, value);
3820 Py_DECREF(key);
3821 return x;
3822}
3823
3824static int
3825load_long_binput(UnpicklerObject *self)
3826{
3827 PyObject *key, *value;
3828 long k;
3829 char *s;
3830 int x;
3831
3832 if (unpickler_read(self, &s, 4) < 0)
3833 return -1;
3834 if ((x = self->stack->length) <= 0)
3835 return stack_underflow();
3836
3837 k = (long)(unsigned char)s[0];
3838 k |= (long)(unsigned char)s[1] << 8;
3839 k |= (long)(unsigned char)s[2] << 16;
3840 k |= (long)(unsigned char)s[3] << 24;
3841
3842 key = PyLong_FromLong(k);
3843 if (key == NULL)
3844 return -1;
3845 value = self->stack->data[x - 1];
3846
3847 x = PyDict_SetItem(self->memo, key, value);
3848 Py_DECREF(key);
3849 return x;
3850}
3851
3852static int
3853do_append(UnpicklerObject *self, int x)
3854{
3855 PyObject *value;
3856 PyObject *list;
3857 int len, i;
3858
3859 len = self->stack->length;
3860 if (x > len || x <= 0)
3861 return stack_underflow();
3862 if (len == x) /* nothing to do */
3863 return 0;
3864
3865 list = self->stack->data[x - 1];
3866
3867 if (PyList_Check(list)) {
3868 PyObject *slice;
3869 Py_ssize_t list_len;
3870
3871 slice = Pdata_poplist(self->stack, x);
3872 if (!slice)
3873 return -1;
3874 list_len = PyList_GET_SIZE(list);
3875 i = PyList_SetSlice(list, list_len, list_len, slice);
3876 Py_DECREF(slice);
3877 return i;
3878 }
3879 else {
3880 PyObject *append_func;
3881
3882 append_func = PyObject_GetAttrString(list, "append");
3883 if (append_func == NULL)
3884 return -1;
3885 for (i = x; i < len; i++) {
3886 PyObject *result;
3887
3888 value = self->stack->data[i];
3889 result = unpickler_call(self, append_func, value);
3890 if (result == NULL) {
3891 Pdata_clear(self->stack, i + 1);
3892 self->stack->length = x;
3893 return -1;
3894 }
3895 Py_DECREF(result);
3896 }
3897 self->stack->length = x;
3898 }
3899
3900 return 0;
3901}
3902
3903static int
3904load_append(UnpicklerObject *self)
3905{
3906 return do_append(self, self->stack->length - 1);
3907}
3908
3909static int
3910load_appends(UnpicklerObject *self)
3911{
3912 return do_append(self, marker(self));
3913}
3914
3915static int
3916do_setitems(UnpicklerObject *self, int x)
3917{
3918 PyObject *value, *key;
3919 PyObject *dict;
3920 int len, i;
3921 int status = 0;
3922
3923 len = self->stack->length;
3924 if (x > len || x <= 0)
3925 return stack_underflow();
3926 if (len == x) /* nothing to do */
3927 return 0;
3928 if ((len - x) % 2 != 0) {
3929 /* Currupt or hostile pickle -- we never write one like this. */
3930 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
3931 return -1;
3932 }
3933
3934 /* Here, dict does not actually need to be a PyDict; it could be anything
3935 that supports the __setitem__ attribute. */
3936 dict = self->stack->data[x - 1];
3937
3938 for (i = x + 1; i < len; i += 2) {
3939 key = self->stack->data[i - 1];
3940 value = self->stack->data[i];
3941 if (PyObject_SetItem(dict, key, value) < 0) {
3942 status = -1;
3943 break;
3944 }
3945 }
3946
3947 Pdata_clear(self->stack, x);
3948 return status;
3949}
3950
3951static int
3952load_setitem(UnpicklerObject *self)
3953{
3954 return do_setitems(self, self->stack->length - 2);
3955}
3956
3957static int
3958load_setitems(UnpicklerObject *self)
3959{
3960 return do_setitems(self, marker(self));
3961}
3962
3963static int
3964load_build(UnpicklerObject *self)
3965{
3966 PyObject *state, *inst, *slotstate;
3967 PyObject *setstate;
3968 int status = 0;
3969
3970 /* Stack is ... instance, state. We want to leave instance at
3971 * the stack top, possibly mutated via instance.__setstate__(state).
3972 */
3973 if (self->stack->length < 2)
3974 return stack_underflow();
3975
3976 PDATA_POP(self->stack, state);
3977 if (state == NULL)
3978 return -1;
3979
3980 inst = self->stack->data[self->stack->length - 1];
3981
3982 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00003983 if (setstate == NULL) {
3984 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3985 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00003986 else {
3987 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00003988 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00003989 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003990 }
3991 else {
3992 PyObject *result;
3993
3994 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitroud79dc622008-09-05 00:03:33 +00003995 /* Ugh... this does not leak since unpickler_call() steals the
3996 reference to state first. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003997 result = unpickler_call(self, setstate, state);
3998 Py_DECREF(setstate);
3999 if (result == NULL)
4000 return -1;
4001 Py_DECREF(result);
4002 return 0;
4003 }
4004
4005 /* A default __setstate__. First see whether state embeds a
4006 * slot state dict too (a proto 2 addition).
4007 */
4008 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
4009 PyObject *tmp = state;
4010
4011 state = PyTuple_GET_ITEM(tmp, 0);
4012 slotstate = PyTuple_GET_ITEM(tmp, 1);
4013 Py_INCREF(state);
4014 Py_INCREF(slotstate);
4015 Py_DECREF(tmp);
4016 }
4017 else
4018 slotstate = NULL;
4019
4020 /* Set inst.__dict__ from the state dict (if any). */
4021 if (state != Py_None) {
4022 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00004023 PyObject *d_key, *d_value;
4024 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004025
4026 if (!PyDict_Check(state)) {
4027 PyErr_SetString(UnpicklingError, "state is not a dictionary");
4028 goto error;
4029 }
4030 dict = PyObject_GetAttrString(inst, "__dict__");
4031 if (dict == NULL)
4032 goto error;
4033
Antoine Pitroua9f48a02009-05-02 21:41:14 +00004034 i = 0;
4035 while (PyDict_Next(state, &i, &d_key, &d_value)) {
4036 /* normally the keys for instance attributes are
4037 interned. we should try to do that here. */
4038 Py_INCREF(d_key);
4039 if (PyUnicode_CheckExact(d_key))
4040 PyUnicode_InternInPlace(&d_key);
4041 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
4042 Py_DECREF(d_key);
4043 goto error;
4044 }
4045 Py_DECREF(d_key);
4046 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004047 Py_DECREF(dict);
4048 }
4049
4050 /* Also set instance attributes from the slotstate dict (if any). */
4051 if (slotstate != NULL) {
4052 PyObject *d_key, *d_value;
4053 Py_ssize_t i;
4054
4055 if (!PyDict_Check(slotstate)) {
4056 PyErr_SetString(UnpicklingError,
4057 "slot state is not a dictionary");
4058 goto error;
4059 }
4060 i = 0;
4061 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
4062 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
4063 goto error;
4064 }
4065 }
4066
4067 if (0) {
4068 error:
4069 status = -1;
4070 }
4071
4072 Py_DECREF(state);
4073 Py_XDECREF(slotstate);
4074 return status;
4075}
4076
4077static int
4078load_mark(UnpicklerObject *self)
4079{
4080
4081 /* Note that we split the (pickle.py) stack into two stacks, an
4082 * object stack and a mark stack. Here we push a mark onto the
4083 * mark stack.
4084 */
4085
4086 if ((self->num_marks + 1) >= self->marks_size) {
4087 size_t alloc;
4088 int *marks;
4089
4090 /* Use the size_t type to check for overflow. */
4091 alloc = ((size_t)self->num_marks << 1) + 20;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00004092 if (alloc > PY_SSIZE_T_MAX ||
4093 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004094 PyErr_NoMemory();
4095 return -1;
4096 }
4097
4098 if (self->marks == NULL)
4099 marks = (int *)PyMem_Malloc(alloc * sizeof(int));
4100 else
4101 marks = (int *)PyMem_Realloc(self->marks, alloc * sizeof(int));
4102 if (marks == NULL) {
4103 PyErr_NoMemory();
4104 return -1;
4105 }
4106 self->marks = marks;
4107 self->marks_size = (Py_ssize_t)alloc;
4108 }
4109
4110 self->marks[self->num_marks++] = self->stack->length;
4111
4112 return 0;
4113}
4114
4115static int
4116load_reduce(UnpicklerObject *self)
4117{
4118 PyObject *callable = NULL;
4119 PyObject *argtup = NULL;
4120 PyObject *obj = NULL;
4121
4122 PDATA_POP(self->stack, argtup);
4123 if (argtup == NULL)
4124 return -1;
4125 PDATA_POP(self->stack, callable);
4126 if (callable) {
4127 obj = instantiate(callable, argtup);
4128 Py_DECREF(callable);
4129 }
4130 Py_DECREF(argtup);
4131
4132 if (obj == NULL)
4133 return -1;
4134
4135 PDATA_PUSH(self->stack, obj, -1);
4136 return 0;
4137}
4138
4139/* Just raises an error if we don't know the protocol specified. PROTO
4140 * is the first opcode for protocols >= 2.
4141 */
4142static int
4143load_proto(UnpicklerObject *self)
4144{
4145 char *s;
4146 int i;
4147
4148 if (unpickler_read(self, &s, 1) < 0)
4149 return -1;
4150
4151 i = (unsigned char)s[0];
4152 if (i <= HIGHEST_PROTOCOL)
4153 return 0;
4154
4155 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
4156 return -1;
4157}
4158
4159static PyObject *
4160load(UnpicklerObject *self)
4161{
4162 PyObject *err;
4163 PyObject *value = NULL;
4164 char *s;
4165
4166 self->num_marks = 0;
4167 if (self->stack->length)
4168 Pdata_clear(self->stack, 0);
4169
4170 /* Convenient macros for the dispatch while-switch loop just below. */
4171#define OP(opcode, load_func) \
4172 case opcode: if (load_func(self) < 0) break; continue;
4173
4174#define OP_ARG(opcode, load_func, arg) \
4175 case opcode: if (load_func(self, (arg)) < 0) break; continue;
4176
4177 while (1) {
4178 if (unpickler_read(self, &s, 1) < 0)
4179 break;
4180
4181 switch ((enum opcode)s[0]) {
4182 OP(NONE, load_none)
4183 OP(BININT, load_binint)
4184 OP(BININT1, load_binint1)
4185 OP(BININT2, load_binint2)
4186 OP(INT, load_int)
4187 OP(LONG, load_long)
4188 OP_ARG(LONG1, load_counted_long, 1)
4189 OP_ARG(LONG4, load_counted_long, 4)
4190 OP(FLOAT, load_float)
4191 OP(BINFLOAT, load_binfloat)
4192 OP(BINBYTES, load_binbytes)
4193 OP(SHORT_BINBYTES, load_short_binbytes)
4194 OP(BINSTRING, load_binstring)
4195 OP(SHORT_BINSTRING, load_short_binstring)
4196 OP(STRING, load_string)
4197 OP(UNICODE, load_unicode)
4198 OP(BINUNICODE, load_binunicode)
4199 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
4200 OP_ARG(TUPLE1, load_counted_tuple, 1)
4201 OP_ARG(TUPLE2, load_counted_tuple, 2)
4202 OP_ARG(TUPLE3, load_counted_tuple, 3)
4203 OP(TUPLE, load_tuple)
4204 OP(EMPTY_LIST, load_empty_list)
4205 OP(LIST, load_list)
4206 OP(EMPTY_DICT, load_empty_dict)
4207 OP(DICT, load_dict)
4208 OP(OBJ, load_obj)
4209 OP(INST, load_inst)
4210 OP(NEWOBJ, load_newobj)
4211 OP(GLOBAL, load_global)
4212 OP(APPEND, load_append)
4213 OP(APPENDS, load_appends)
4214 OP(BUILD, load_build)
4215 OP(DUP, load_dup)
4216 OP(BINGET, load_binget)
4217 OP(LONG_BINGET, load_long_binget)
4218 OP(GET, load_get)
4219 OP(MARK, load_mark)
4220 OP(BINPUT, load_binput)
4221 OP(LONG_BINPUT, load_long_binput)
4222 OP(PUT, load_put)
4223 OP(POP, load_pop)
4224 OP(POP_MARK, load_pop_mark)
4225 OP(SETITEM, load_setitem)
4226 OP(SETITEMS, load_setitems)
4227 OP(PERSID, load_persid)
4228 OP(BINPERSID, load_binpersid)
4229 OP(REDUCE, load_reduce)
4230 OP(PROTO, load_proto)
4231 OP_ARG(EXT1, load_extension, 1)
4232 OP_ARG(EXT2, load_extension, 2)
4233 OP_ARG(EXT4, load_extension, 4)
4234 OP_ARG(NEWTRUE, load_bool, Py_True)
4235 OP_ARG(NEWFALSE, load_bool, Py_False)
4236
4237 case STOP:
4238 break;
4239
4240 case '\0':
4241 PyErr_SetNone(PyExc_EOFError);
4242 return NULL;
4243
4244 default:
4245 PyErr_Format(UnpicklingError,
4246 "invalid load key, '%c'.", s[0]);
4247 return NULL;
4248 }
4249
4250 break; /* and we are done! */
4251 }
4252
4253 /* XXX: It is not clear what this is actually for. */
4254 if ((err = PyErr_Occurred())) {
4255 if (err == PyExc_EOFError) {
4256 PyErr_SetNone(PyExc_EOFError);
4257 }
4258 return NULL;
4259 }
4260
4261 PDATA_POP(self->stack, value);
4262 return value;
4263}
4264
4265PyDoc_STRVAR(Unpickler_load_doc,
4266"load() -> object. Load a pickle."
4267"\n"
4268"Read a pickled object representation from the open file object given in\n"
4269"the constructor, and return the reconstituted object hierarchy specified\n"
4270"therein.\n");
4271
4272static PyObject *
4273Unpickler_load(UnpicklerObject *self)
4274{
4275 /* Check whether the Unpickler was initialized correctly. This prevents
4276 segfaulting if a subclass overridden __init__ with a function that does
4277 not call Unpickler.__init__(). Here, we simply ensure that self->read
4278 is not NULL. */
4279 if (self->read == NULL) {
4280 PyErr_Format(UnpicklingError,
4281 "Unpickler.__init__() was not called by %s.__init__()",
4282 Py_TYPE(self)->tp_name);
4283 return NULL;
4284 }
4285
4286 return load(self);
4287}
4288
4289/* The name of find_class() is misleading. In newer pickle protocols, this
4290 function is used for loading any global (i.e., functions), not just
4291 classes. The name is kept only for backward compatibility. */
4292
4293PyDoc_STRVAR(Unpickler_find_class_doc,
4294"find_class(module_name, global_name) -> object.\n"
4295"\n"
4296"Return an object from a specified module, importing the module if\n"
4297"necessary. Subclasses may override this method (e.g. to restrict\n"
4298"unpickling of arbitrary classes and functions).\n"
4299"\n"
4300"This method is called whenever a class or a function object is\n"
4301"needed. Both arguments passed are str objects.\n");
4302
4303static PyObject *
4304Unpickler_find_class(UnpicklerObject *self, PyObject *args)
4305{
4306 PyObject *global;
4307 PyObject *modules_dict;
4308 PyObject *module;
4309 PyObject *module_name, *global_name;
4310
4311 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
4312 &module_name, &global_name))
4313 return NULL;
4314
4315 modules_dict = PySys_GetObject("modules");
4316 if (modules_dict == NULL)
4317 return NULL;
4318
4319 module = PyDict_GetItem(modules_dict, module_name);
4320 if (module == NULL) {
4321 module = PyImport_Import(module_name);
4322 if (module == NULL)
4323 return NULL;
4324 global = PyObject_GetAttr(module, global_name);
4325 Py_DECREF(module);
4326 }
4327 else {
4328 global = PyObject_GetAttr(module, global_name);
4329 }
4330 return global;
4331}
4332
4333static struct PyMethodDef Unpickler_methods[] = {
4334 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
4335 Unpickler_load_doc},
4336 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
4337 Unpickler_find_class_doc},
4338 {NULL, NULL} /* sentinel */
4339};
4340
4341static void
4342Unpickler_dealloc(UnpicklerObject *self)
4343{
4344 PyObject_GC_UnTrack((PyObject *)self);
4345 Py_XDECREF(self->readline);
4346 Py_XDECREF(self->read);
4347 Py_XDECREF(self->memo);
4348 Py_XDECREF(self->stack);
4349 Py_XDECREF(self->pers_func);
4350 Py_XDECREF(self->arg);
4351 Py_XDECREF(self->last_string);
4352
4353 PyMem_Free(self->marks);
4354 free(self->encoding);
4355 free(self->errors);
4356
4357 Py_TYPE(self)->tp_free((PyObject *)self);
4358}
4359
4360static int
4361Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
4362{
4363 Py_VISIT(self->readline);
4364 Py_VISIT(self->read);
4365 Py_VISIT(self->memo);
4366 Py_VISIT(self->stack);
4367 Py_VISIT(self->pers_func);
4368 Py_VISIT(self->arg);
4369 Py_VISIT(self->last_string);
4370 return 0;
4371}
4372
4373static int
4374Unpickler_clear(UnpicklerObject *self)
4375{
4376 Py_CLEAR(self->readline);
4377 Py_CLEAR(self->read);
4378 Py_CLEAR(self->memo);
4379 Py_CLEAR(self->stack);
4380 Py_CLEAR(self->pers_func);
4381 Py_CLEAR(self->arg);
4382 Py_CLEAR(self->last_string);
4383
4384 PyMem_Free(self->marks);
4385 self->marks = NULL;
4386 free(self->encoding);
4387 self->encoding = NULL;
4388 free(self->errors);
4389 self->errors = NULL;
4390
4391 return 0;
4392}
4393
4394PyDoc_STRVAR(Unpickler_doc,
4395"Unpickler(file, *, encoding='ASCII', errors='strict')"
4396"\n"
4397"This takes a binary file for reading a pickle data stream.\n"
4398"\n"
4399"The protocol version of the pickle is detected automatically, so no\n"
4400"proto argument is needed.\n"
4401"\n"
4402"The file-like object must have two methods, a read() method\n"
4403"that takes an integer argument, and a readline() method that\n"
4404"requires no arguments. Both methods should return bytes.\n"
4405"Thus file-like object can be a binary file object opened for\n"
4406"reading, a BytesIO object, or any other custom object that\n"
4407"meets this interface.\n"
4408"\n"
4409"Optional keyword arguments are encoding and errors, which are\n"
4410"used to decode 8-bit string instances pickled by Python 2.x.\n"
4411"These default to 'ASCII' and 'strict', respectively.\n");
4412
4413static int
4414Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
4415{
4416 static char *kwlist[] = {"file", "encoding", "errors", 0};
4417 PyObject *file;
4418 char *encoding = NULL;
4419 char *errors = NULL;
4420
4421 /* XXX: That is an horrible error message. But, I don't know how to do
4422 better... */
4423 if (Py_SIZE(args) != 1) {
4424 PyErr_Format(PyExc_TypeError,
4425 "%s takes exactly one positional argument (%zd given)",
4426 Py_TYPE(self)->tp_name, Py_SIZE(args));
4427 return -1;
4428 }
4429
4430 /* Arguments parsing needs to be done in the __init__() method to allow
4431 subclasses to define their own __init__() method, which may (or may
4432 not) support Unpickler arguments. However, this means we need to be
4433 extra careful in the other Unpickler methods, since a subclass could
4434 forget to call Unpickler.__init__() thus breaking our internal
4435 invariants. */
4436 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist,
4437 &file, &encoding, &errors))
4438 return -1;
4439
4440 /* In case of multiple __init__() calls, clear previous content. */
4441 if (self->read != NULL)
4442 (void)Unpickler_clear(self);
4443
4444 self->read = PyObject_GetAttrString(file, "read");
4445 self->readline = PyObject_GetAttrString(file, "readline");
4446 if (self->readline == NULL || self->read == NULL)
4447 return -1;
4448
4449 if (encoding == NULL)
4450 encoding = "ASCII";
4451 if (errors == NULL)
4452 errors = "strict";
4453
4454 self->encoding = strdup(encoding);
4455 self->errors = strdup(errors);
4456 if (self->encoding == NULL || self->errors == NULL) {
4457 PyErr_NoMemory();
4458 return -1;
4459 }
4460
4461 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
4462 self->pers_func = PyObject_GetAttrString((PyObject *)self,
4463 "persistent_load");
4464 if (self->pers_func == NULL)
4465 return -1;
4466 }
4467 else {
4468 self->pers_func = NULL;
4469 }
4470
4471 self->stack = (Pdata *)Pdata_New();
4472 if (self->stack == NULL)
4473 return -1;
4474
4475 self->memo = PyDict_New();
4476 if (self->memo == NULL)
4477 return -1;
4478
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00004479 self->last_string = NULL;
4480 self->arg = NULL;
4481
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004482 return 0;
4483}
4484
4485static PyObject *
4486Unpickler_get_memo(UnpicklerObject *self)
4487{
4488 if (self->memo == NULL)
4489 PyErr_SetString(PyExc_AttributeError, "memo");
4490 else
4491 Py_INCREF(self->memo);
4492 return self->memo;
4493}
4494
4495static int
4496Unpickler_set_memo(UnpicklerObject *self, PyObject *value)
4497{
4498 PyObject *tmp;
4499
4500 if (value == NULL) {
4501 PyErr_SetString(PyExc_TypeError,
4502 "attribute deletion is not supported");
4503 return -1;
4504 }
4505 if (!PyDict_Check(value)) {
4506 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
4507 return -1;
4508 }
4509
4510 tmp = self->memo;
4511 Py_INCREF(value);
4512 self->memo = value;
4513 Py_XDECREF(tmp);
4514
4515 return 0;
4516}
4517
4518static PyObject *
4519Unpickler_get_persload(UnpicklerObject *self)
4520{
4521 if (self->pers_func == NULL)
4522 PyErr_SetString(PyExc_AttributeError, "persistent_load");
4523 else
4524 Py_INCREF(self->pers_func);
4525 return self->pers_func;
4526}
4527
4528static int
4529Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
4530{
4531 PyObject *tmp;
4532
4533 if (value == NULL) {
4534 PyErr_SetString(PyExc_TypeError,
4535 "attribute deletion is not supported");
4536 return -1;
4537 }
4538 if (!PyCallable_Check(value)) {
4539 PyErr_SetString(PyExc_TypeError,
4540 "persistent_load must be a callable taking "
4541 "one argument");
4542 return -1;
4543 }
4544
4545 tmp = self->pers_func;
4546 Py_INCREF(value);
4547 self->pers_func = value;
4548 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
4549
4550 return 0;
4551}
4552
4553static PyGetSetDef Unpickler_getsets[] = {
4554 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
4555 {"persistent_load", (getter)Unpickler_get_persload,
4556 (setter)Unpickler_set_persload},
4557 {NULL}
4558};
4559
4560static PyTypeObject Unpickler_Type = {
4561 PyVarObject_HEAD_INIT(NULL, 0)
4562 "_pickle.Unpickler", /*tp_name*/
4563 sizeof(UnpicklerObject), /*tp_basicsize*/
4564 0, /*tp_itemsize*/
4565 (destructor)Unpickler_dealloc, /*tp_dealloc*/
4566 0, /*tp_print*/
4567 0, /*tp_getattr*/
4568 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00004569 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004570 0, /*tp_repr*/
4571 0, /*tp_as_number*/
4572 0, /*tp_as_sequence*/
4573 0, /*tp_as_mapping*/
4574 0, /*tp_hash*/
4575 0, /*tp_call*/
4576 0, /*tp_str*/
4577 0, /*tp_getattro*/
4578 0, /*tp_setattro*/
4579 0, /*tp_as_buffer*/
4580 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4581 Unpickler_doc, /*tp_doc*/
4582 (traverseproc)Unpickler_traverse, /*tp_traverse*/
4583 (inquiry)Unpickler_clear, /*tp_clear*/
4584 0, /*tp_richcompare*/
4585 0, /*tp_weaklistoffset*/
4586 0, /*tp_iter*/
4587 0, /*tp_iternext*/
4588 Unpickler_methods, /*tp_methods*/
4589 0, /*tp_members*/
4590 Unpickler_getsets, /*tp_getset*/
4591 0, /*tp_base*/
4592 0, /*tp_dict*/
4593 0, /*tp_descr_get*/
4594 0, /*tp_descr_set*/
4595 0, /*tp_dictoffset*/
4596 (initproc)Unpickler_init, /*tp_init*/
4597 PyType_GenericAlloc, /*tp_alloc*/
4598 PyType_GenericNew, /*tp_new*/
4599 PyObject_GC_Del, /*tp_free*/
4600 0, /*tp_is_gc*/
4601};
4602
4603static int
4604init_stuff(void)
4605{
4606 PyObject *copyreg;
4607
4608 copyreg = PyImport_ImportModule("copyreg");
4609 if (!copyreg)
4610 return -1;
4611
4612 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
4613 if (!dispatch_table)
4614 goto error;
4615
4616 extension_registry = \
4617 PyObject_GetAttrString(copyreg, "_extension_registry");
4618 if (!extension_registry)
4619 goto error;
4620
4621 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
4622 if (!inverted_registry)
4623 goto error;
4624
4625 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
4626 if (!extension_cache)
4627 goto error;
4628
4629 Py_DECREF(copyreg);
4630
4631 empty_tuple = PyTuple_New(0);
4632 if (empty_tuple == NULL)
4633 return -1;
4634
4635 two_tuple = PyTuple_New(2);
4636 if (two_tuple == NULL)
4637 return -1;
4638 /* We use this temp container with no regard to refcounts, or to
4639 * keeping containees alive. Exempt from GC, because we don't
4640 * want anything looking at two_tuple() by magic.
4641 */
4642 PyObject_GC_UnTrack(two_tuple);
4643
4644 return 0;
4645
4646 error:
4647 Py_DECREF(copyreg);
4648 return -1;
4649}
4650
4651static struct PyModuleDef _picklemodule = {
4652 PyModuleDef_HEAD_INIT,
4653 "_pickle",
4654 pickle_module_doc,
4655 -1,
4656 NULL,
4657 NULL,
4658 NULL,
4659 NULL,
4660 NULL
4661};
4662
4663PyMODINIT_FUNC
4664PyInit__pickle(void)
4665{
4666 PyObject *m;
4667
4668 if (PyType_Ready(&Unpickler_Type) < 0)
4669 return NULL;
4670 if (PyType_Ready(&Pickler_Type) < 0)
4671 return NULL;
4672 if (PyType_Ready(&Pdata_Type) < 0)
4673 return NULL;
4674
4675 /* Create the module and add the functions. */
4676 m = PyModule_Create(&_picklemodule);
4677 if (m == NULL)
4678 return NULL;
4679
4680 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
4681 return NULL;
4682 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
4683 return NULL;
4684
4685 /* Initialize the exceptions. */
4686 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
4687 if (PickleError == NULL)
4688 return NULL;
4689 PicklingError = \
4690 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
4691 if (PicklingError == NULL)
4692 return NULL;
4693 UnpicklingError = \
4694 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
4695 if (UnpicklingError == NULL)
4696 return NULL;
4697
4698 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
4699 return NULL;
4700 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
4701 return NULL;
4702 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
4703 return NULL;
4704
4705 if (init_stuff() < 0)
4706 return NULL;
4707
4708 return m;
4709}