blob: 8be7a8d362120ba595ca84d78f62ee2a65358569 [file] [log] [blame]
Guido van Rossum54f22ed2000-02-04 15:10:34 +00001"""Create portable serialized representations of Python objects.
Guido van Rossuma48061a1995-01-10 00:31:14 +00002
Guido van Rossume467be61997-12-05 19:42:42 +00003See module cPickle for a (much) faster implementation.
4See module copy_reg for a mechanism for registering custom picklers.
Guido van Rossuma48061a1995-01-10 00:31:14 +00005
Guido van Rossume467be61997-12-05 19:42:42 +00006Classes:
Guido van Rossuma48061a1995-01-10 00:31:14 +00007
Guido van Rossume467be61997-12-05 19:42:42 +00008 Pickler
9 Unpickler
Guido van Rossuma48061a1995-01-10 00:31:14 +000010
Guido van Rossume467be61997-12-05 19:42:42 +000011Functions:
Guido van Rossuma48061a1995-01-10 00:31:14 +000012
Guido van Rossume467be61997-12-05 19:42:42 +000013 dump(object, file)
14 dumps(object) -> string
15 load(file) -> object
16 loads(string) -> object
Guido van Rossuma48061a1995-01-10 00:31:14 +000017
Guido van Rossume467be61997-12-05 19:42:42 +000018Misc variables:
Guido van Rossuma48061a1995-01-10 00:31:14 +000019
Fred Drakefe82acc1998-02-13 03:24:48 +000020 __version__
Guido van Rossume467be61997-12-05 19:42:42 +000021 format_version
22 compatible_formats
Guido van Rossuma48061a1995-01-10 00:31:14 +000023
Guido van Rossuma48061a1995-01-10 00:31:14 +000024"""
25
Guido van Rossum743d17e1998-09-15 20:25:57 +000026__version__ = "$Revision$" # Code version
Guido van Rossuma48061a1995-01-10 00:31:14 +000027
28from types import *
Guido van Rossum4fb5b281997-09-12 20:07:24 +000029from copy_reg import dispatch_table, safe_constructors
Guido van Rossumd3703791998-10-22 20:15:36 +000030import marshal
31import sys
32import struct
Skip Montanaro23bafc62001-02-18 03:10:09 +000033import re
Guido van Rossuma48061a1995-01-10 00:31:14 +000034
Skip Montanaro352674d2001-02-07 23:14:30 +000035__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
36 "Unpickler", "dump", "dumps", "load", "loads"]
37
Guido van Rossumd3703791998-10-22 20:15:36 +000038format_version = "1.3" # File format version we write
39compatible_formats = ["1.0", "1.1", "1.2"] # Old format versions we can read
Guido van Rossumb72cf2d1997-04-09 17:32:51 +000040
41mdumps = marshal.dumps
42mloads = marshal.loads
Guido van Rossum0c891ce1995-03-14 15:09:05 +000043
Guido van Rossum26e89d02000-06-29 16:15:52 +000044class PickleError(Exception): pass
45class PicklingError(PickleError): pass
46class UnpicklingError(PickleError): pass
Guido van Rossum7849da81995-03-09 14:08:35 +000047
Guido van Rossumff871742000-12-13 18:11:56 +000048class _Stop(Exception):
49 def __init__(self, value):
50 self.value = value
51
Jeremy Hylton2b9d0291998-05-27 22:38:22 +000052try:
53 from org.python.core import PyStringMap
54except ImportError:
55 PyStringMap = None
56
Guido van Rossumb72cf2d1997-04-09 17:32:51 +000057MARK = '('
58STOP = '.'
59POP = '0'
60POP_MARK = '1'
61DUP = '2'
62FLOAT = 'F'
63INT = 'I'
64BININT = 'J'
65BININT1 = 'K'
66LONG = 'L'
67BININT2 = 'M'
68NONE = 'N'
69PERSID = 'P'
70BINPERSID = 'Q'
71REDUCE = 'R'
72STRING = 'S'
73BINSTRING = 'T'
74SHORT_BINSTRING = 'U'
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +000075UNICODE = 'V'
76BINUNICODE = 'X'
Guido van Rossumb72cf2d1997-04-09 17:32:51 +000077APPEND = 'a'
78BUILD = 'b'
79GLOBAL = 'c'
80DICT = 'd'
81EMPTY_DICT = '}'
82APPENDS = 'e'
83GET = 'g'
84BINGET = 'h'
85INST = 'i'
86LONG_BINGET = 'j'
87LIST = 'l'
88EMPTY_LIST = ']'
89OBJ = 'o'
90PUT = 'p'
91BINPUT = 'q'
92LONG_BINPUT = 'r'
93SETITEM = 's'
94TUPLE = 't'
95EMPTY_TUPLE = ')'
96SETITEMS = 'u'
Guido van Rossumd3703791998-10-22 20:15:36 +000097BINFLOAT = 'G'
Guido van Rossuma48061a1995-01-10 00:31:14 +000098
Skip Montanaro23bafc62001-02-18 03:10:09 +000099__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
100
Guido van Rossuma48061a1995-01-10 00:31:14 +0000101class Pickler:
102
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000103 def __init__(self, file, bin = 0):
104 self.write = file.write
105 self.memo = {}
106 self.bin = bin
Guido van Rossuma48061a1995-01-10 00:31:14 +0000107
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000108 def dump(self, object):
109 self.save(object)
110 self.write(STOP)
Guido van Rossuma48061a1995-01-10 00:31:14 +0000111
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000112 def put(self, i):
Tim Petersc32d8242001-04-10 02:48:53 +0000113 if self.bin:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000114 s = mdumps(i)[1:]
Tim Petersc32d8242001-04-10 02:48:53 +0000115 if i < 256:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000116 return BINPUT + s[0]
Guido van Rossuma48061a1995-01-10 00:31:14 +0000117
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000118 return LONG_BINPUT + s
Guido van Rossuma48061a1995-01-10 00:31:14 +0000119
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000120 return PUT + `i` + '\n'
Guido van Rossuma48061a1995-01-10 00:31:14 +0000121
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000122 def get(self, i):
Tim Petersc32d8242001-04-10 02:48:53 +0000123 if self.bin:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000124 s = mdumps(i)[1:]
Guido van Rossuma48061a1995-01-10 00:31:14 +0000125
Tim Petersc32d8242001-04-10 02:48:53 +0000126 if i < 256:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000127 return BINGET + s[0]
Guido van Rossuma48061a1995-01-10 00:31:14 +0000128
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000129 return LONG_BINGET + s
Guido van Rossuma48061a1995-01-10 00:31:14 +0000130
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000131 return GET + `i` + '\n'
Tim Peters2344fae2001-01-15 00:50:52 +0000132
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000133 def save(self, object, pers_save = 0):
134 memo = self.memo
Guido van Rossuma48061a1995-01-10 00:31:14 +0000135
Tim Petersc32d8242001-04-10 02:48:53 +0000136 if not pers_save:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000137 pid = self.persistent_id(object)
Tim Petersc32d8242001-04-10 02:48:53 +0000138 if pid is not None:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000139 self.save_pers(pid)
140 return
Guido van Rossuma48061a1995-01-10 00:31:14 +0000141
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000142 d = id(object)
Tim Peters2344fae2001-01-15 00:50:52 +0000143
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000144 t = type(object)
Guido van Rossuma48061a1995-01-10 00:31:14 +0000145
Tim Petersc32d8242001-04-10 02:48:53 +0000146 if (t is TupleType) and (len(object) == 0):
147 if self.bin:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000148 self.save_empty_tuple(object)
149 else:
150 self.save_tuple(object)
151 return
Guido van Rossuma48061a1995-01-10 00:31:14 +0000152
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000153 if memo.has_key(d):
154 self.write(self.get(memo[d][0]))
155 return
156
157 try:
158 f = self.dispatch[t]
159 except KeyError:
160 pid = self.inst_persistent_id(object)
161 if pid is not None:
162 self.save_pers(pid)
163 return
164
165 try:
166 reduce = dispatch_table[t]
167 except KeyError:
168 try:
169 reduce = object.__reduce__
170 except AttributeError:
171 raise PicklingError, \
Guido van Rossum08a92cb1999-10-10 21:14:25 +0000172 "can't pickle %s object: %s" % (`t.__name__`,
173 `object`)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000174 else:
175 tup = reduce()
176 else:
177 tup = reduce(object)
178
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000179 if type(tup) is StringType:
180 self.save_global(object, tup)
181 return
Guido van Rossumd1f49841997-12-10 23:40:18 +0000182
Tim Petersc32d8242001-04-10 02:48:53 +0000183 if type(tup) is not TupleType:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000184 raise PicklingError, "Value returned by %s must be a " \
185 "tuple" % reduce
186
187 l = len(tup)
Tim Peters2344fae2001-01-15 00:50:52 +0000188
Tim Petersc32d8242001-04-10 02:48:53 +0000189 if (l != 2) and (l != 3):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000190 raise PicklingError, "tuple returned by %s must contain " \
191 "only two or three elements" % reduce
192
193 callable = tup[0]
194 arg_tup = tup[1]
Tim Peters2344fae2001-01-15 00:50:52 +0000195
Tim Petersc32d8242001-04-10 02:48:53 +0000196 if l > 2:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000197 state = tup[2]
198 else:
199 state = None
200
Guido van Rossumd1f49841997-12-10 23:40:18 +0000201 if type(arg_tup) is not TupleType and arg_tup is not None:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000202 raise PicklingError, "Second element of tuple returned " \
203 "by %s must be a tuple" % reduce
204
Tim Peters2344fae2001-01-15 00:50:52 +0000205 self.save_reduce(callable, arg_tup, state)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000206 memo_len = len(memo)
207 self.write(self.put(memo_len))
208 memo[d] = (memo_len, object)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000209 return
210
211 f(self, object)
212
213 def persistent_id(self, object):
214 return None
215
216 def inst_persistent_id(self, object):
217 return None
218
219 def save_pers(self, pid):
Tim Petersc32d8242001-04-10 02:48:53 +0000220 if not self.bin:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000221 self.write(PERSID + str(pid) + '\n')
222 else:
223 self.save(pid, 1)
224 self.write(BINPERSID)
225
226 def save_reduce(self, callable, arg_tup, state = None):
227 write = self.write
228 save = self.save
229
230 save(callable)
231 save(arg_tup)
232 write(REDUCE)
Tim Peters2344fae2001-01-15 00:50:52 +0000233
Tim Petersc32d8242001-04-10 02:48:53 +0000234 if state is not None:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000235 save(state)
236 write(BUILD)
237
238 dispatch = {}
239
240 def save_none(self, object):
241 self.write(NONE)
242 dispatch[NoneType] = save_none
243
244 def save_int(self, object):
Tim Petersc32d8242001-04-10 02:48:53 +0000245 if self.bin:
Tim Peters44714002001-04-10 05:02:52 +0000246 # If the int is small enough to fit in a signed 4-byte 2's-comp
247 # format, we can store it more efficiently than the general
248 # case.
249 high_bits = object >> 31 # note that Python shift sign-extends
250 if high_bits == 0 or high_bits == -1:
251 # All high bits are copies of bit 2**31, so the value
252 # fits in a 4-byte signed int.
253 i = mdumps(object)[1:]
254 assert len(i) == 4
255 if i[-2:] == '\000\000': # fits in 2-byte unsigned int
256 if i[-3] == '\000': # fits in 1-byte unsigned int
257 self.write(BININT1 + i[0])
258 else:
259 self.write(BININT2 + i[:2])
260 else:
261 self.write(BININT + i)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000262 return
Tim Peters44714002001-04-10 05:02:52 +0000263 # Text pickle, or int too big to fit in signed 4-byte format.
264 self.write(INT + `object` + '\n')
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000265 dispatch[IntType] = save_int
266
267 def save_long(self, object):
268 self.write(LONG + `object` + '\n')
269 dispatch[LongType] = save_long
270
Guido van Rossumd3703791998-10-22 20:15:36 +0000271 def save_float(self, object, pack=struct.pack):
272 if self.bin:
273 self.write(BINFLOAT + pack('>d', object))
274 else:
275 self.write(FLOAT + `object` + '\n')
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000276 dispatch[FloatType] = save_float
277
278 def save_string(self, object):
279 d = id(object)
280 memo = self.memo
281
Tim Petersc32d8242001-04-10 02:48:53 +0000282 if self.bin:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000283 l = len(object)
284 s = mdumps(l)[1:]
Tim Petersc32d8242001-04-10 02:48:53 +0000285 if l < 256:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000286 self.write(SHORT_BINSTRING + s[0] + object)
287 else:
288 self.write(BINSTRING + s + object)
289 else:
290 self.write(STRING + `object` + '\n')
291
292 memo_len = len(memo)
293 self.write(self.put(memo_len))
294 memo[d] = (memo_len, object)
295 dispatch[StringType] = save_string
296
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +0000297 def save_unicode(self, object):
298 d = id(object)
299 memo = self.memo
300
Tim Petersc32d8242001-04-10 02:48:53 +0000301 if self.bin:
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +0000302 encoding = object.encode('utf-8')
303 l = len(encoding)
304 s = mdumps(l)[1:]
305 self.write(BINUNICODE + s + encoding)
306 else:
Guido van Rossuma31b4ef2000-12-19 01:29:00 +0000307 object = object.replace(u"\\", u"\\u005c")
308 object = object.replace(u"\n", u"\\u000a")
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +0000309 self.write(UNICODE + object.encode('raw-unicode-escape') + '\n')
310
311 memo_len = len(memo)
312 self.write(self.put(memo_len))
313 memo[d] = (memo_len, object)
314 dispatch[UnicodeType] = save_unicode
315
Guido van Rossum31584cb2001-01-22 14:53:29 +0000316 if StringType == UnicodeType:
317 # This is true for Jython
318 def save_string(self, object):
319 d = id(object)
320 memo = self.memo
321 unicode = object.isunicode()
322
Tim Petersc32d8242001-04-10 02:48:53 +0000323 if self.bin:
Guido van Rossum31584cb2001-01-22 14:53:29 +0000324 if unicode:
325 object = object.encode("utf-8")
326 l = len(object)
327 s = mdumps(l)[1:]
Tim Petersc32d8242001-04-10 02:48:53 +0000328 if l < 256 and not unicode:
Guido van Rossum31584cb2001-01-22 14:53:29 +0000329 self.write(SHORT_BINSTRING + s[0] + object)
330 else:
331 if unicode:
332 self.write(BINUNICODE + s + object)
333 else:
334 self.write(BINSTRING + s + object)
335 else:
Tim Peters658cba62001-02-09 20:06:00 +0000336 if unicode:
Guido van Rossum31584cb2001-01-22 14:53:29 +0000337 object = object.replace(u"\\", u"\\u005c")
338 object = object.replace(u"\n", u"\\u000a")
339 object = object.encode('raw-unicode-escape')
340 self.write(UNICODE + object + '\n')
341 else:
342 self.write(STRING + `object` + '\n')
343
344 memo_len = len(memo)
345 self.write(self.put(memo_len))
346 memo[d] = (memo_len, object)
347 dispatch[StringType] = save_string
Tim Peters658cba62001-02-09 20:06:00 +0000348
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000349 def save_tuple(self, object):
350
351 write = self.write
352 save = self.save
353 memo = self.memo
354
355 d = id(object)
356
357 write(MARK)
358
359 for element in object:
360 save(element)
361
Tim Petersc32d8242001-04-10 02:48:53 +0000362 if len(object) and memo.has_key(d):
363 if self.bin:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000364 write(POP_MARK + self.get(memo[d][0]))
365 return
Tim Peters2344fae2001-01-15 00:50:52 +0000366
Guido van Rossum599174f1998-03-31 16:30:28 +0000367 write(POP * (len(object) + 1) + self.get(memo[d][0]))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000368 return
369
370 memo_len = len(memo)
371 self.write(TUPLE + self.put(memo_len))
372 memo[d] = (memo_len, object)
373 dispatch[TupleType] = save_tuple
374
375 def save_empty_tuple(self, object):
376 self.write(EMPTY_TUPLE)
377
378 def save_list(self, object):
379 d = id(object)
380
381 write = self.write
382 save = self.save
383 memo = self.memo
384
Tim Petersc32d8242001-04-10 02:48:53 +0000385 if self.bin:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000386 write(EMPTY_LIST)
387 else:
388 write(MARK + LIST)
389
390 memo_len = len(memo)
391 write(self.put(memo_len))
392 memo[d] = (memo_len, object)
393
394 using_appends = (self.bin and (len(object) > 1))
395
Tim Petersc32d8242001-04-10 02:48:53 +0000396 if using_appends:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000397 write(MARK)
398
399 for element in object:
400 save(element)
Tim Peters2344fae2001-01-15 00:50:52 +0000401
Tim Petersc32d8242001-04-10 02:48:53 +0000402 if not using_appends:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000403 write(APPEND)
404
Tim Petersc32d8242001-04-10 02:48:53 +0000405 if using_appends:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000406 write(APPENDS)
407 dispatch[ListType] = save_list
408
409 def save_dict(self, object):
410 d = id(object)
411
412 write = self.write
413 save = self.save
414 memo = self.memo
415
Tim Petersc32d8242001-04-10 02:48:53 +0000416 if self.bin:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000417 write(EMPTY_DICT)
418 else:
419 write(MARK + DICT)
420
421 memo_len = len(memo)
422 self.write(self.put(memo_len))
423 memo[d] = (memo_len, object)
424
425 using_setitems = (self.bin and (len(object) > 1))
426
Tim Petersc32d8242001-04-10 02:48:53 +0000427 if using_setitems:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000428 write(MARK)
429
430 items = object.items()
431 for key, value in items:
432 save(key)
433 save(value)
434
Tim Petersc32d8242001-04-10 02:48:53 +0000435 if not using_setitems:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000436 write(SETITEM)
437
Tim Petersc32d8242001-04-10 02:48:53 +0000438 if using_setitems:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000439 write(SETITEMS)
440
441 dispatch[DictionaryType] = save_dict
Jeremy Hylton2b9d0291998-05-27 22:38:22 +0000442 if not PyStringMap is None:
443 dispatch[PyStringMap] = save_dict
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000444
445 def save_inst(self, object):
446 d = id(object)
447 cls = object.__class__
448
449 memo = self.memo
450 write = self.write
451 save = self.save
452
453 if hasattr(object, '__getinitargs__'):
454 args = object.__getinitargs__()
455 len(args) # XXX Assert it's a sequence
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000456 _keep_alive(args, memo)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000457 else:
458 args = ()
459
460 write(MARK)
461
Tim Petersc32d8242001-04-10 02:48:53 +0000462 if self.bin:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000463 save(cls)
464
465 for arg in args:
466 save(arg)
467
468 memo_len = len(memo)
Tim Petersc32d8242001-04-10 02:48:53 +0000469 if self.bin:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000470 write(OBJ + self.put(memo_len))
471 else:
Guido van Rossum4fb5b281997-09-12 20:07:24 +0000472 write(INST + cls.__module__ + '\n' + cls.__name__ + '\n' +
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000473 self.put(memo_len))
474
475 memo[d] = (memo_len, object)
476
477 try:
478 getstate = object.__getstate__
479 except AttributeError:
480 stuff = object.__dict__
481 else:
482 stuff = getstate()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000483 _keep_alive(stuff, memo)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000484 save(stuff)
485 write(BUILD)
486 dispatch[InstanceType] = save_inst
487
488 def save_global(self, object, name = None):
489 write = self.write
490 memo = self.memo
491
Tim Petersc32d8242001-04-10 02:48:53 +0000492 if name is None:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000493 name = object.__name__
494
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000495 try:
496 module = object.__module__
497 except AttributeError:
498 module = whichmodule(object, name)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000499
Guido van Rossumb0a98e92001-08-17 18:49:52 +0000500 try:
501 __import__(module)
502 mod = sys.modules[module]
503 klass = getattr(mod, name)
504 except (ImportError, KeyError, AttributeError):
505 raise PicklingError(
506 "Can't pickle %r: it's not found as %s.%s" %
507 (object, module, name))
508 else:
509 if klass is not object:
510 raise PicklingError(
511 "Can't pickle %r: it's not the same object as %s.%s" %
512 (object, module, name))
513
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000514 memo_len = len(memo)
515 write(GLOBAL + module + '\n' + name + '\n' +
516 self.put(memo_len))
517 memo[id(object)] = (memo_len, object)
518 dispatch[ClassType] = save_global
519 dispatch[FunctionType] = save_global
520 dispatch[BuiltinFunctionType] = save_global
Tim Peters6d6c1a32001-08-02 04:15:00 +0000521 dispatch[TypeType] = save_global
Guido van Rossum0c891ce1995-03-14 15:09:05 +0000522
Guido van Rossuma48061a1995-01-10 00:31:14 +0000523
Guido van Rossum5ed5c4c1997-09-03 00:23:54 +0000524def _keep_alive(x, memo):
525 """Keeps a reference to the object x in the memo.
526
527 Because we remember objects by their id, we have
528 to assure that possibly temporary objects are kept
529 alive by referencing them.
530 We store a reference at the id of the memo, which should
531 normally not be used unless someone tries to deepcopy
532 the memo itself...
533 """
534 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000535 memo[id(memo)].append(x)
Guido van Rossum5ed5c4c1997-09-03 00:23:54 +0000536 except KeyError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000537 # aha, this is the first one :-)
538 memo[id(memo)]=[x]
Guido van Rossum5ed5c4c1997-09-03 00:23:54 +0000539
540
Guido van Rossuma48061a1995-01-10 00:31:14 +0000541classmap = {}
542
Guido van Rossum4fb5b281997-09-12 20:07:24 +0000543# This is no longer used to find classes, but still for functions
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000544def whichmodule(cls, clsname):
545 """Figure out the module in which a class occurs.
Guido van Rossuma48061a1995-01-10 00:31:14 +0000546
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000547 Search sys.modules for the module.
548 Cache in classmap.
549 Return a module name.
550 If the class cannot be found, return __main__.
551 """
552 if classmap.has_key(cls):
553 return classmap[cls]
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000554
555 for name, module in sys.modules.items():
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000556 if name != '__main__' and \
557 hasattr(module, clsname) and \
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000558 getattr(module, clsname) is cls:
559 break
560 else:
561 name = '__main__'
562 classmap[cls] = name
563 return name
Guido van Rossuma48061a1995-01-10 00:31:14 +0000564
565
566class Unpickler:
567
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000568 def __init__(self, file):
569 self.readline = file.readline
570 self.read = file.read
571 self.memo = {}
Guido van Rossuma48061a1995-01-10 00:31:14 +0000572
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000573 def load(self):
574 self.mark = ['spam'] # Any new unique object
575 self.stack = []
576 self.append = self.stack.append
577 read = self.read
578 dispatch = self.dispatch
579 try:
580 while 1:
581 key = read(1)
582 dispatch[key](self)
Guido van Rossumff871742000-12-13 18:11:56 +0000583 except _Stop, stopinst:
584 return stopinst.value
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000585
586 def marker(self):
587 stack = self.stack
588 mark = self.mark
589 k = len(stack)-1
590 while stack[k] is not mark: k = k-1
591 return k
592
593 dispatch = {}
594
595 def load_eof(self):
596 raise EOFError
597 dispatch[''] = load_eof
598
599 def load_persid(self):
600 pid = self.readline()[:-1]
601 self.append(self.persistent_load(pid))
602 dispatch[PERSID] = load_persid
603
604 def load_binpersid(self):
605 stack = self.stack
Tim Peters2344fae2001-01-15 00:50:52 +0000606
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000607 pid = stack[-1]
608 del stack[-1]
609
610 self.append(self.persistent_load(pid))
611 dispatch[BINPERSID] = load_binpersid
612
613 def load_none(self):
614 self.append(None)
615 dispatch[NONE] = load_none
616
617 def load_int(self):
Guido van Rossumff871742000-12-13 18:11:56 +0000618 self.append(int(self.readline()[:-1]))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000619 dispatch[INT] = load_int
620
621 def load_binint(self):
622 self.append(mloads('i' + self.read(4)))
623 dispatch[BININT] = load_binint
624
625 def load_binint1(self):
626 self.append(mloads('i' + self.read(1) + '\000\000\000'))
627 dispatch[BININT1] = load_binint1
628
629 def load_binint2(self):
630 self.append(mloads('i' + self.read(2) + '\000\000'))
631 dispatch[BININT2] = load_binint2
Tim Peters2344fae2001-01-15 00:50:52 +0000632
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000633 def load_long(self):
Guido van Rossumff871742000-12-13 18:11:56 +0000634 self.append(long(self.readline()[:-1], 0))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000635 dispatch[LONG] = load_long
636
637 def load_float(self):
Guido van Rossumff871742000-12-13 18:11:56 +0000638 self.append(float(self.readline()[:-1]))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000639 dispatch[FLOAT] = load_float
640
Guido van Rossumd3703791998-10-22 20:15:36 +0000641 def load_binfloat(self, unpack=struct.unpack):
642 self.append(unpack('>d', self.read(8))[0])
643 dispatch[BINFLOAT] = load_binfloat
644
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000645 def load_string(self):
Jeremy Hyltonbe467e52000-09-15 15:14:51 +0000646 rep = self.readline()[:-1]
647 if not self._is_string_secure(rep):
648 raise ValueError, "insecure string pickle"
649 self.append(eval(rep,
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000650 {'__builtins__': {}})) # Let's be careful
651 dispatch[STRING] = load_string
652
Jeremy Hyltonbe467e52000-09-15 15:14:51 +0000653 def _is_string_secure(self, s):
654 """Return true if s contains a string that is safe to eval
655
656 The definition of secure string is based on the implementation
657 in cPickle. s is secure as long as it only contains a quoted
658 string and optional trailing whitespace.
659 """
660 q = s[0]
661 if q not in ("'", '"'):
662 return 0
663 # find the closing quote
664 offset = 1
665 i = None
666 while 1:
667 try:
668 i = s.index(q, offset)
669 except ValueError:
670 # if there is an error the first time, there is no
671 # close quote
672 if offset == 1:
673 return 0
674 if s[i-1] != '\\':
675 break
676 # check to see if this one is escaped
677 nslash = 0
678 j = i - 1
679 while j >= offset and s[j] == '\\':
680 j = j - 1
681 nslash = nslash + 1
682 if nslash % 2 == 0:
683 break
684 offset = i + 1
685 for c in s[i+1:]:
686 if ord(c) > 32:
687 return 0
688 return 1
689
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000690 def load_binstring(self):
691 len = mloads('i' + self.read(4))
692 self.append(self.read(len))
693 dispatch[BINSTRING] = load_binstring
694
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +0000695 def load_unicode(self):
696 self.append(unicode(self.readline()[:-1],'raw-unicode-escape'))
697 dispatch[UNICODE] = load_unicode
698
699 def load_binunicode(self):
700 len = mloads('i' + self.read(4))
701 self.append(unicode(self.read(len),'utf-8'))
702 dispatch[BINUNICODE] = load_binunicode
703
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000704 def load_short_binstring(self):
705 len = mloads('i' + self.read(1) + '\000\000\000')
706 self.append(self.read(len))
707 dispatch[SHORT_BINSTRING] = load_short_binstring
708
709 def load_tuple(self):
710 k = self.marker()
711 self.stack[k:] = [tuple(self.stack[k+1:])]
712 dispatch[TUPLE] = load_tuple
713
714 def load_empty_tuple(self):
715 self.stack.append(())
716 dispatch[EMPTY_TUPLE] = load_empty_tuple
717
718 def load_empty_list(self):
719 self.stack.append([])
720 dispatch[EMPTY_LIST] = load_empty_list
721
722 def load_empty_dictionary(self):
723 self.stack.append({})
724 dispatch[EMPTY_DICT] = load_empty_dictionary
725
726 def load_list(self):
727 k = self.marker()
728 self.stack[k:] = [self.stack[k+1:]]
729 dispatch[LIST] = load_list
730
731 def load_dict(self):
732 k = self.marker()
733 d = {}
734 items = self.stack[k+1:]
735 for i in range(0, len(items), 2):
736 key = items[i]
737 value = items[i+1]
738 d[key] = value
739 self.stack[k:] = [d]
740 dispatch[DICT] = load_dict
741
742 def load_inst(self):
743 k = self.marker()
744 args = tuple(self.stack[k+1:])
745 del self.stack[k:]
746 module = self.readline()[:-1]
747 name = self.readline()[:-1]
748 klass = self.find_class(module, name)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000749 instantiated = 0
750 if (not args and type(klass) is ClassType and
751 not hasattr(klass, "__getinitargs__")):
752 try:
753 value = _EmptyClass()
754 value.__class__ = klass
Guido van Rossumb19e2a31998-04-13 18:08:45 +0000755 instantiated = 1
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000756 except RuntimeError:
757 # In restricted execution, assignment to inst.__class__ is
758 # prohibited
759 pass
760 if not instantiated:
Guido van Rossum743d17e1998-09-15 20:25:57 +0000761 try:
762 value = apply(klass, args)
763 except TypeError, err:
764 raise TypeError, "in constructor for %s: %s" % (
765 klass.__name__, str(err)), sys.exc_info()[2]
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000766 self.append(value)
767 dispatch[INST] = load_inst
768
769 def load_obj(self):
770 stack = self.stack
771 k = self.marker()
772 klass = stack[k + 1]
773 del stack[k + 1]
Tim Peters2344fae2001-01-15 00:50:52 +0000774 args = tuple(stack[k + 1:])
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000775 del stack[k:]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000776 instantiated = 0
777 if (not args and type(klass) is ClassType and
778 not hasattr(klass, "__getinitargs__")):
779 try:
780 value = _EmptyClass()
781 value.__class__ = klass
782 instantiated = 1
783 except RuntimeError:
784 # In restricted execution, assignment to inst.__class__ is
785 # prohibited
786 pass
787 if not instantiated:
788 value = apply(klass, args)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000789 self.append(value)
Tim Peters2344fae2001-01-15 00:50:52 +0000790 dispatch[OBJ] = load_obj
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000791
792 def load_global(self):
793 module = self.readline()[:-1]
794 name = self.readline()[:-1]
795 klass = self.find_class(module, name)
796 self.append(klass)
797 dispatch[GLOBAL] = load_global
798
799 def find_class(self, module, name):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000800 try:
Guido van Rossum397032a1999-03-25 21:58:59 +0000801 __import__(module)
802 mod = sys.modules[module]
803 klass = getattr(mod, name)
804 except (ImportError, KeyError, AttributeError):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000805 raise SystemError, \
806 "Failed to import class %s from module %s" % \
807 (name, module)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000808 return klass
809
810 def load_reduce(self):
811 stack = self.stack
812
813 callable = stack[-2]
814 arg_tup = stack[-1]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000815 del stack[-2:]
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000816
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000817 if type(callable) is not ClassType:
818 if not safe_constructors.has_key(callable):
819 try:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000820 safe = callable.__safe_for_unpickling__
821 except AttributeError:
822 safe = None
Guido van Rossuma48061a1995-01-10 00:31:14 +0000823
Tim Petersc32d8242001-04-10 02:48:53 +0000824 if not safe:
Tim Peters2344fae2001-01-15 00:50:52 +0000825 raise UnpicklingError, "%s is not safe for " \
826 "unpickling" % callable
Guido van Rossuma48061a1995-01-10 00:31:14 +0000827
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000828 if arg_tup is None:
829 value = callable.__basicnew__()
830 else:
831 value = apply(callable, arg_tup)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000832 self.append(value)
833 dispatch[REDUCE] = load_reduce
Guido van Rossuma48061a1995-01-10 00:31:14 +0000834
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000835 def load_pop(self):
836 del self.stack[-1]
837 dispatch[POP] = load_pop
Guido van Rossum7b5430f1995-03-04 22:25:21 +0000838
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000839 def load_pop_mark(self):
840 k = self.marker()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000841 del self.stack[k:]
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000842 dispatch[POP_MARK] = load_pop_mark
Guido van Rossuma48061a1995-01-10 00:31:14 +0000843
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000844 def load_dup(self):
Guido van Rossumb1062fc1998-03-31 17:00:46 +0000845 self.append(self.stack[-1])
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000846 dispatch[DUP] = load_dup
Guido van Rossuma48061a1995-01-10 00:31:14 +0000847
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000848 def load_get(self):
849 self.append(self.memo[self.readline()[:-1]])
850 dispatch[GET] = load_get
Guido van Rossum78536471996-04-12 13:36:27 +0000851
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000852 def load_binget(self):
853 i = mloads('i' + self.read(1) + '\000\000\000')
854 self.append(self.memo[`i`])
855 dispatch[BINGET] = load_binget
Guido van Rossum78536471996-04-12 13:36:27 +0000856
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000857 def load_long_binget(self):
858 i = mloads('i' + self.read(4))
859 self.append(self.memo[`i`])
860 dispatch[LONG_BINGET] = load_long_binget
Guido van Rossum78536471996-04-12 13:36:27 +0000861
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000862 def load_put(self):
863 self.memo[self.readline()[:-1]] = self.stack[-1]
864 dispatch[PUT] = load_put
Guido van Rossuma48061a1995-01-10 00:31:14 +0000865
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000866 def load_binput(self):
867 i = mloads('i' + self.read(1) + '\000\000\000')
868 self.memo[`i`] = self.stack[-1]
869 dispatch[BINPUT] = load_binput
Guido van Rossuma48061a1995-01-10 00:31:14 +0000870
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000871 def load_long_binput(self):
872 i = mloads('i' + self.read(4))
873 self.memo[`i`] = self.stack[-1]
874 dispatch[LONG_BINPUT] = load_long_binput
Guido van Rossuma48061a1995-01-10 00:31:14 +0000875
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000876 def load_append(self):
877 stack = self.stack
878 value = stack[-1]
879 del stack[-1]
880 list = stack[-1]
881 list.append(value)
882 dispatch[APPEND] = load_append
Guido van Rossuma48061a1995-01-10 00:31:14 +0000883
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000884 def load_appends(self):
885 stack = self.stack
886 mark = self.marker()
887 list = stack[mark - 1]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000888 for i in range(mark + 1, len(stack)):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000889 list.append(stack[i])
Guido van Rossum0c891ce1995-03-14 15:09:05 +0000890
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000891 del stack[mark:]
892 dispatch[APPENDS] = load_appends
Tim Peters2344fae2001-01-15 00:50:52 +0000893
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000894 def load_setitem(self):
895 stack = self.stack
896 value = stack[-1]
897 key = stack[-2]
898 del stack[-2:]
899 dict = stack[-1]
900 dict[key] = value
901 dispatch[SETITEM] = load_setitem
Guido van Rossum0c891ce1995-03-14 15:09:05 +0000902
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000903 def load_setitems(self):
904 stack = self.stack
905 mark = self.marker()
906 dict = stack[mark - 1]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000907 for i in range(mark + 1, len(stack), 2):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000908 dict[stack[i]] = stack[i + 1]
Guido van Rossuma48061a1995-01-10 00:31:14 +0000909
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000910 del stack[mark:]
911 dispatch[SETITEMS] = load_setitems
Guido van Rossuma48061a1995-01-10 00:31:14 +0000912
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000913 def load_build(self):
914 stack = self.stack
915 value = stack[-1]
916 del stack[-1]
917 inst = stack[-1]
918 try:
919 setstate = inst.__setstate__
920 except AttributeError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000921 try:
922 inst.__dict__.update(value)
923 except RuntimeError:
924 # XXX In restricted execution, the instance's __dict__ is not
925 # accessible. Use the old way of unpickling the instance
926 # variables. This is a semantic different when unpickling in
927 # restricted vs. unrestricted modes.
928 for k, v in value.items():
929 setattr(inst, k, v)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000930 else:
931 setstate(value)
932 dispatch[BUILD] = load_build
Guido van Rossuma48061a1995-01-10 00:31:14 +0000933
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000934 def load_mark(self):
935 self.append(self.mark)
936 dispatch[MARK] = load_mark
Guido van Rossuma48061a1995-01-10 00:31:14 +0000937
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000938 def load_stop(self):
939 value = self.stack[-1]
940 del self.stack[-1]
Guido van Rossumff871742000-12-13 18:11:56 +0000941 raise _Stop(value)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000942 dispatch[STOP] = load_stop
Guido van Rossuma48061a1995-01-10 00:31:14 +0000943
Guido van Rossume467be61997-12-05 19:42:42 +0000944# Helper class for load_inst/load_obj
945
946class _EmptyClass:
947 pass
Guido van Rossuma48061a1995-01-10 00:31:14 +0000948
Guido van Rossum0c891ce1995-03-14 15:09:05 +0000949# Shorthands
950
Guido van Rossumc7c5e691996-07-22 22:26:07 +0000951from StringIO import StringIO
952
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000953def dump(object, file, bin = 0):
954 Pickler(file, bin).dump(object)
Guido van Rossum0c891ce1995-03-14 15:09:05 +0000955
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000956def dumps(object, bin = 0):
957 file = StringIO()
958 Pickler(file, bin).dump(object)
959 return file.getvalue()
Guido van Rossum0c891ce1995-03-14 15:09:05 +0000960
961def load(file):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000962 return Unpickler(file).load()
Guido van Rossum0c891ce1995-03-14 15:09:05 +0000963
964def loads(str):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000965 file = StringIO(str)
966 return Unpickler(file).load()
Guido van Rossum0c891ce1995-03-14 15:09:05 +0000967
968
969# The rest is used for testing only
970
Guido van Rossuma48061a1995-01-10 00:31:14 +0000971class C:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000972 def __cmp__(self, other):
973 return cmp(self.__dict__, other.__dict__)
Guido van Rossuma48061a1995-01-10 00:31:14 +0000974
975def test():
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000976 fn = 'out'
977 c = C()
978 c.foo = 1
979 c.bar = 2
980 x = [0, 1, 2, 3]
981 y = ('abc', 'abc', c, c)
982 x.append(y)
983 x.append(y)
984 x.append(5)
985 f = open(fn, 'w')
986 F = Pickler(f)
987 F.dump(x)
988 f.close()
989 f = open(fn, 'r')
990 U = Unpickler(f)
991 x2 = U.load()
992 print x
993 print x2
994 print x == x2
995 print map(id, x)
996 print map(id, x2)
997 print F.memo
998 print U.memo
Guido van Rossuma48061a1995-01-10 00:31:14 +0000999
1000if __name__ == '__main__':
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001001 test()