blob: 1b364c6a1847f30482c8fb7758befa3e2740df24 [file] [log] [blame]
Guido van Rossum54f22ed2000-02-04 15:10:34 +00001"""Create portable serialized representations of Python objects.
Guido van Rossuma48061a1995-01-10 00:31:14 +00002
Guido van Rossume467be61997-12-05 19:42:42 +00003See module cPickle for a (much) faster implementation.
4See module copy_reg for a mechanism for registering custom picklers.
Tim Peters22a449a2003-01-27 20:16:36 +00005See module pickletools source for extensive comments.
Guido van Rossuma48061a1995-01-10 00:31:14 +00006
Guido van Rossume467be61997-12-05 19:42:42 +00007Classes:
Guido van Rossuma48061a1995-01-10 00:31:14 +00008
Guido van Rossume467be61997-12-05 19:42:42 +00009 Pickler
10 Unpickler
Guido van Rossuma48061a1995-01-10 00:31:14 +000011
Guido van Rossume467be61997-12-05 19:42:42 +000012Functions:
Guido van Rossuma48061a1995-01-10 00:31:14 +000013
Guido van Rossume467be61997-12-05 19:42:42 +000014 dump(object, file)
15 dumps(object) -> string
16 load(file) -> object
17 loads(string) -> object
Guido van Rossuma48061a1995-01-10 00:31:14 +000018
Guido van Rossume467be61997-12-05 19:42:42 +000019Misc variables:
Guido van Rossuma48061a1995-01-10 00:31:14 +000020
Fred Drakefe82acc1998-02-13 03:24:48 +000021 __version__
Guido van Rossume467be61997-12-05 19:42:42 +000022 format_version
23 compatible_formats
Guido van Rossuma48061a1995-01-10 00:31:14 +000024
Guido van Rossuma48061a1995-01-10 00:31:14 +000025"""
26
Guido van Rossum743d17e1998-09-15 20:25:57 +000027__version__ = "$Revision$" # Code version
Guido van Rossuma48061a1995-01-10 00:31:14 +000028
29from types import *
Guido van Rossum4fb5b281997-09-12 20:07:24 +000030from copy_reg import dispatch_table, safe_constructors
Guido van Rossumd3703791998-10-22 20:15:36 +000031import marshal
32import sys
33import struct
Skip Montanaro23bafc62001-02-18 03:10:09 +000034import re
Guido van Rossumbc64e222003-01-28 16:34:19 +000035import warnings
Guido van Rossuma48061a1995-01-10 00:31:14 +000036
Skip Montanaro352674d2001-02-07 23:14:30 +000037__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
38 "Unpickler", "dump", "dumps", "load", "loads"]
39
Guido van Rossumf29d3d62003-01-27 22:47:53 +000040# These are purely informational; no code usues these
41format_version = "2.0" # File format version we write
42compatible_formats = ["1.0", # Original protocol 0
Guido van Rossumbc64e222003-01-28 16:34:19 +000043 "1.1", # Protocol 0 with INST added
Guido van Rossumf29d3d62003-01-27 22:47:53 +000044 "1.2", # Original protocol 1
45 "1.3", # Protocol 1 with BINFLOAT added
46 "2.0", # Protocol 2
47 ] # Old format versions we can read
Guido van Rossumb72cf2d1997-04-09 17:32:51 +000048
Guido van Rossume0b90422003-01-28 03:17:21 +000049# Why use struct.pack() for pickling but marshal.loads() for
50# unpickling? struct.pack() is 40% faster than marshal.loads(), but
51# marshal.loads() is twice as fast as struct.unpack()!
Guido van Rossumb72cf2d1997-04-09 17:32:51 +000052mloads = marshal.loads
Guido van Rossum0c891ce1995-03-14 15:09:05 +000053
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000054class PickleError(Exception):
Neal Norwitzefbb67b2002-05-30 12:12:04 +000055 """A common base class for the other pickling exceptions."""
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000056 pass
57
58class PicklingError(PickleError):
59 """This exception is raised when an unpicklable object is passed to the
60 dump() method.
61
62 """
63 pass
64
65class UnpicklingError(PickleError):
66 """This exception is raised when there is a problem unpickling an object,
67 such as a security violation.
68
69 Note that other exceptions may also be raised during unpickling, including
70 (but not necessarily limited to) AttributeError, EOFError, ImportError,
71 and IndexError.
72
73 """
74 pass
Guido van Rossum7849da81995-03-09 14:08:35 +000075
Guido van Rossumff871742000-12-13 18:11:56 +000076class _Stop(Exception):
77 def __init__(self, value):
78 self.value = value
79
Jeremy Hylton2b9d0291998-05-27 22:38:22 +000080try:
81 from org.python.core import PyStringMap
82except ImportError:
83 PyStringMap = None
84
Guido van Rossumdbb718f2001-09-21 19:22:34 +000085try:
86 UnicodeType
87except NameError:
88 UnicodeType = None
89
Tim Peters22a449a2003-01-27 20:16:36 +000090# Pickle opcodes. See pickletools.py for extensive docs. The listing
91# here is in kind-of alphabetical order of 1-character pickle code.
92# pickletools groups them by purpose.
Guido van Rossumdbb718f2001-09-21 19:22:34 +000093
Tim Peters22a449a2003-01-27 20:16:36 +000094MARK = '(' # push special markobject on stack
95STOP = '.' # every pickle ends with STOP
96POP = '0' # discard topmost stack item
97POP_MARK = '1' # discard stack top through topmost markobject
98DUP = '2' # duplicate top stack item
99FLOAT = 'F' # push float object; decimal string argument
100INT = 'I' # push integer or bool; decimal string argument
101BININT = 'J' # push four-byte signed int
102BININT1 = 'K' # push 1-byte unsigned int
103LONG = 'L' # push long; decimal string argument
104BININT2 = 'M' # push 2-byte unsigned int
105NONE = 'N' # push None
106PERSID = 'P' # push persistent object; id is taken from string arg
107BINPERSID = 'Q' # " " " ; " " " " stack
108REDUCE = 'R' # apply callable to argtuple, both on stack
109STRING = 'S' # push string; NL-terminated string argument
110BINSTRING = 'T' # push string; counted binary string argument
111SHORT_BINSTRING = 'U' # " " ; " " " " < 256 bytes
112UNICODE = 'V' # push Unicode string; raw-unicode-escaped'd argument
113BINUNICODE = 'X' # " " " ; counted UTF-8 string argument
114APPEND = 'a' # append stack top to list below it
115BUILD = 'b' # call __setstate__ or __dict__.update()
116GLOBAL = 'c' # push self.find_class(modname, name); 2 string args
117DICT = 'd' # build a dict from stack items
118EMPTY_DICT = '}' # push empty dict
119APPENDS = 'e' # extend list on stack by topmost stack slice
120GET = 'g' # push item from memo on stack; index is string arg
121BINGET = 'h' # " " " " " " ; " " 1-byte arg
122INST = 'i' # build & push class instance
123LONG_BINGET = 'j' # push item from memo on stack; index is 4-byte arg
124LIST = 'l' # build list from topmost stack items
125EMPTY_LIST = ']' # push empty list
126OBJ = 'o' # build & push class instance
127PUT = 'p' # store stack top in memo; index is string arg
128BINPUT = 'q' # " " " " " ; " " 1-byte arg
129LONG_BINPUT = 'r' # " " " " " ; " " 4-byte arg
130SETITEM = 's' # add key+value pair to dict
131TUPLE = 't' # build tuple from topmost stack items
132EMPTY_TUPLE = ')' # push empty tuple
133SETITEMS = 'u' # modify dict by adding topmost key+value pairs
134BINFLOAT = 'G' # push float; arg is 8-byte float encoding
135
136TRUE = 'I01\n' # not an opcode; see INT docs in pickletools.py
137FALSE = 'I00\n' # not an opcode; see INT docs in pickletools.py
Guido van Rossum77f6a652002-04-03 22:41:51 +0000138
Tim Peterse1054782003-01-28 00:22:12 +0000139# Protocol 2 (not yet implemented).
Guido van Rossum5a2d8f52003-01-27 21:44:25 +0000140
Tim Peterse1054782003-01-28 00:22:12 +0000141PROTO = '\x80' # identify pickle protocol
142NEWOBJ = '\x81' # build object by applying cls.__new__ to argtuple
143EXT1 = '\x82' # push object from extension registry; 1-byte index
144EXT2 = '\x83' # ditto, but 2-byte index
145EXT4 = '\x84' # ditto, but 4-byte index
146TUPLE1 = '\x85' # build 1-tuple from stack top
147TUPLE2 = '\x86' # build 2-tuple from two topmost stack items
148TUPLE3 = '\x87' # build 3-tuple from three topmost stack items
149NEWTRUE = '\x88' # push True
150NEWFALSE = '\x89' # push False
151LONG1 = '\x8a' # push long from < 256 bytes
152LONG4 = '\x8b' # push really big long
Guido van Rossum5a2d8f52003-01-27 21:44:25 +0000153
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000154_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
155
Guido van Rossuma48061a1995-01-10 00:31:14 +0000156
Skip Montanaro23bafc62001-02-18 03:10:09 +0000157__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
Neal Norwitzd5ba4ae2002-02-11 18:12:06 +0000158del x
Skip Montanaro23bafc62001-02-18 03:10:09 +0000159
Guido van Rossum1be31752003-01-28 15:19:53 +0000160
161# Pickling machinery
162
Guido van Rossuma48061a1995-01-10 00:31:14 +0000163class Pickler:
164
Guido van Rossumf29d3d62003-01-27 22:47:53 +0000165 def __init__(self, file, proto=1):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000166 """This takes a file-like object for writing a pickle data stream.
167
Guido van Rossumf29d3d62003-01-27 22:47:53 +0000168 The optional proto argument tells the pickler to use the given
169 protocol; supported protocols are 0, 1, 2. The default
170 protocol is 1 (in previous Python versions the default was 0).
171
172 Protocol 1 is more efficient than protocol 0; protocol 2 is
173 more efficient than protocol 1. Protocol 2 is not the default
174 because it is not supported by older Python versions.
175
176 XXX Protocol 2 is not yet implemented.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000177
178 The file parameter must have a write() method that accepts a single
179 string argument. It can thus be an open file object, a StringIO
180 object, or any other custom object that meets this interface.
181
182 """
Guido van Rossum1be31752003-01-28 15:19:53 +0000183 if proto not in (0, 1, 2):
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000184 raise ValueError, "pickle protocol must be 0, 1 or 2"
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000185 self.write = file.write
186 self.memo = {}
Guido van Rossum1be31752003-01-28 15:19:53 +0000187 self.proto = int(proto)
Guido van Rossumf29d3d62003-01-27 22:47:53 +0000188 self.bin = proto >= 1
Guido van Rossuma48061a1995-01-10 00:31:14 +0000189
Fred Drake7f781c92002-05-01 20:33:53 +0000190 def clear_memo(self):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000191 """Clears the pickler's "memo".
192
193 The memo is the data structure that remembers which objects the
Tim Petersb377f8a2003-01-28 00:23:36 +0000194 pickler has already seen, so that shared or recursive objects are
195 pickled by reference and not by value. This method is useful when
196 re-using picklers.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000197
198 """
Fred Drake7f781c92002-05-01 20:33:53 +0000199 self.memo.clear()
200
Guido van Rossum3a41c612003-01-28 15:10:22 +0000201 def dump(self, obj):
202 """Write a pickled representation of obj to the open file.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000203
204 Either the binary or ASCII format will be used, depending on the
205 value of the bin flag passed to the constructor.
206
207 """
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000208 if self.proto >= 2:
209 self.write(PROTO + chr(self.proto))
Guido van Rossum3a41c612003-01-28 15:10:22 +0000210 self.save(obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000211 self.write(STOP)
Guido van Rossuma48061a1995-01-10 00:31:14 +0000212
Jeremy Hylton3422c992003-01-24 19:29:52 +0000213 def memoize(self, obj):
214 """Store an object in the memo."""
215
Tim Peterse46b73f2003-01-27 21:22:10 +0000216 # The Pickler memo is a dictionary mapping object ids to 2-tuples
217 # that contain the Unpickler memo key and the object being memoized.
218 # The memo key is written to the pickle and will become
Jeremy Hylton3422c992003-01-24 19:29:52 +0000219 # the key in the Unpickler's memo. The object is stored in the
Tim Peterse46b73f2003-01-27 21:22:10 +0000220 # Pickler memo so that transient objects are kept alive during
221 # pickling.
Jeremy Hylton3422c992003-01-24 19:29:52 +0000222
Tim Peterse46b73f2003-01-27 21:22:10 +0000223 # The use of the Unpickler memo length as the memo key is just a
224 # convention. The only requirement is that the memo values be unique.
225 # But there appears no advantage to any other scheme, and this
Tim Peterscbd0a322003-01-28 00:24:43 +0000226 # scheme allows the Unpickler memo to be implemented as a plain (but
Tim Peterse46b73f2003-01-27 21:22:10 +0000227 # growable) array, indexed by memo key.
Jeremy Hylton3422c992003-01-24 19:29:52 +0000228 memo_len = len(self.memo)
229 self.write(self.put(memo_len))
Tim Peters518df0d2003-01-28 01:00:38 +0000230 self.memo[id(obj)] = memo_len, obj
Jeremy Hylton3422c992003-01-24 19:29:52 +0000231
Tim Petersbb38e302003-01-27 21:25:41 +0000232 # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
Guido van Rossum5c938d02003-01-28 03:03:08 +0000233 def put(self, i, pack=struct.pack):
Tim Petersc32d8242001-04-10 02:48:53 +0000234 if self.bin:
Tim Petersc32d8242001-04-10 02:48:53 +0000235 if i < 256:
Guido van Rossum5c938d02003-01-28 03:03:08 +0000236 return BINPUT + chr(i)
237 else:
238 return LONG_BINPUT + pack("<i", i)
Guido van Rossuma48061a1995-01-10 00:31:14 +0000239
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000240 return PUT + `i` + '\n'
Guido van Rossuma48061a1995-01-10 00:31:14 +0000241
Tim Petersbb38e302003-01-27 21:25:41 +0000242 # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
Guido van Rossum5c938d02003-01-28 03:03:08 +0000243 def get(self, i, pack=struct.pack):
Tim Petersc32d8242001-04-10 02:48:53 +0000244 if self.bin:
Tim Petersc32d8242001-04-10 02:48:53 +0000245 if i < 256:
Guido van Rossum5c938d02003-01-28 03:03:08 +0000246 return BINGET + chr(i)
247 else:
248 return LONG_BINGET + pack("<i", i)
Guido van Rossuma48061a1995-01-10 00:31:14 +0000249
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000250 return GET + `i` + '\n'
Tim Peters2344fae2001-01-15 00:50:52 +0000251
Guido van Rossum3a41c612003-01-28 15:10:22 +0000252 def save(self, obj):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000253 # Check for persistent id (defined by a subclass)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000254 pid = self.persistent_id(obj)
Guido van Rossumbc64e222003-01-28 16:34:19 +0000255 if pid:
Jeremy Hylton5e0f4e72002-11-13 22:01:27 +0000256 self.save_pers(pid)
257 return
Guido van Rossuma48061a1995-01-10 00:31:14 +0000258
Guido van Rossumbc64e222003-01-28 16:34:19 +0000259 # Check the memo
260 x = self.memo.get(id(obj))
261 if x:
262 self.write(self.get(x[0]))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000263 return
264
Guido van Rossumbc64e222003-01-28 16:34:19 +0000265 # Check the type dispatch table
Guido van Rossum3a41c612003-01-28 15:10:22 +0000266 t = type(obj)
Guido van Rossumbc64e222003-01-28 16:34:19 +0000267 f = self.dispatch.get(t)
268 if f:
269 f(self, obj) # Call unbound method with explicit self
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000270 return
271
Guido van Rossumbc64e222003-01-28 16:34:19 +0000272 # Check for a class with a custom metaclass; treat as regular class
Tim Petersb32a8312003-01-28 00:48:09 +0000273 try:
274 issc = issubclass(t, TypeType)
Guido van Rossumbc64e222003-01-28 16:34:19 +0000275 except TypeError: # t is not a class (old Boost; see SF #502085)
Tim Petersb32a8312003-01-28 00:48:09 +0000276 issc = 0
277 if issc:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000278 self.save_global(obj)
Tim Petersb32a8312003-01-28 00:48:09 +0000279 return
280
Guido van Rossumbc64e222003-01-28 16:34:19 +0000281 # Check copy_reg.dispatch_table
282 reduce = dispatch_table.get(t)
283 if reduce:
284 rv = reduce(obj)
Tim Petersb32a8312003-01-28 00:48:09 +0000285 else:
Guido van Rossumbc64e222003-01-28 16:34:19 +0000286 # Check for __reduce__ method
287 reduce = getattr(obj, "__reduce__", None)
288 if not reduce:
289 raise PicklingError("Can't pickle %r object: %r" %
290 (t.__name__, obj))
291 rv = reduce()
Tim Petersb32a8312003-01-28 00:48:09 +0000292
Guido van Rossumbc64e222003-01-28 16:34:19 +0000293 # Check for string returned by reduce(), meaning "save as global"
294 if type(rv) is StringType:
295 self.save_global(obj, rv)
Tim Petersb32a8312003-01-28 00:48:09 +0000296 return
297
Guido van Rossumbc64e222003-01-28 16:34:19 +0000298 # Assert that reduce() returned a tuple
299 if type(rv) is not TupleType:
300 raise PicklingError("%s must return string or tuple" % reduce)
Tim Petersb32a8312003-01-28 00:48:09 +0000301
Guido van Rossumbc64e222003-01-28 16:34:19 +0000302 # Assert that it returned a 2-tuple or 3-tuple, and unpack it
303 l = len(rv)
304 if l == 2:
305 func, args = rv
Tim Petersb32a8312003-01-28 00:48:09 +0000306 state = None
Guido van Rossumbc64e222003-01-28 16:34:19 +0000307 elif l == 3:
308 func, args, state = rv
309 else:
310 raise PicklingError("Tuple returned by %s must have "
311 "exactly two or three elements" % reduce)
Tim Petersb32a8312003-01-28 00:48:09 +0000312
Guido van Rossumbc64e222003-01-28 16:34:19 +0000313 # Save the reduce() output and finally memoize the object
314 self.save_reduce(func, args, state)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000315 self.memoize(obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000316
Guido van Rossum3a41c612003-01-28 15:10:22 +0000317 def persistent_id(self, obj):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000318 # This exists so a subclass can override it
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000319 return None
320
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000321 def save_pers(self, pid):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000322 # Save a persistent id reference
Tim Petersbd1cdb92003-01-28 01:03:10 +0000323 if self.bin:
Jeremy Hylton5e0f4e72002-11-13 22:01:27 +0000324 self.save(pid)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000325 self.write(BINPERSID)
Tim Petersbd1cdb92003-01-28 01:03:10 +0000326 else:
327 self.write(PERSID + str(pid) + '\n')
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000328
Guido van Rossumbc64e222003-01-28 16:34:19 +0000329 def save_reduce(self, func, args, state=None):
330 # This API is be called by some subclasses
331
332 # Assert that args is a tuple or None
333 if not isinstance(args, TupleType):
334 if args is None:
335 # A hack for Jim Fulton's ExtensionClass, now deprecated.
336 # See load_reduce()
337 warnings.warn("__basicnew__ special case is deprecated",
338 DeprecationWarning)
339 else:
340 raise PicklingError(
341 "args from reduce() should be a tuple")
342
343 # Assert that func is callable
344 if not callable(func):
345 raise PicklingError("func from reduce should be callable")
346
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000347 save = self.save
Guido van Rossumbc64e222003-01-28 16:34:19 +0000348 write = self.write
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000349
Guido van Rossumbc64e222003-01-28 16:34:19 +0000350 save(func)
351 save(args)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000352 write(REDUCE)
Tim Peters2344fae2001-01-15 00:50:52 +0000353
Tim Petersc32d8242001-04-10 02:48:53 +0000354 if state is not None:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000355 save(state)
356 write(BUILD)
357
Guido van Rossumbc64e222003-01-28 16:34:19 +0000358 # Methods below this point are dispatched through the dispatch table
359
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000360 dispatch = {}
361
Guido van Rossum3a41c612003-01-28 15:10:22 +0000362 def save_none(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000363 self.write(NONE)
364 dispatch[NoneType] = save_none
365
Guido van Rossum3a41c612003-01-28 15:10:22 +0000366 def save_bool(self, obj):
Guido van Rossum7d97d312003-01-28 04:25:27 +0000367 if self.proto >= 2:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000368 self.write(obj and NEWTRUE or NEWFALSE)
Guido van Rossum7d97d312003-01-28 04:25:27 +0000369 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000370 self.write(obj and TRUE or FALSE)
Guido van Rossum77f6a652002-04-03 22:41:51 +0000371 dispatch[bool] = save_bool
372
Guido van Rossum3a41c612003-01-28 15:10:22 +0000373 def save_int(self, obj, pack=struct.pack):
Tim Petersc32d8242001-04-10 02:48:53 +0000374 if self.bin:
Tim Peters44714002001-04-10 05:02:52 +0000375 # If the int is small enough to fit in a signed 4-byte 2's-comp
376 # format, we can store it more efficiently than the general
377 # case.
Guido van Rossum5c938d02003-01-28 03:03:08 +0000378 # First one- and two-byte unsigned ints:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000379 if obj >= 0:
380 if obj <= 0xff:
381 self.write(BININT1 + chr(obj))
Guido van Rossum5c938d02003-01-28 03:03:08 +0000382 return
Guido van Rossum3a41c612003-01-28 15:10:22 +0000383 if obj <= 0xffff:
384 self.write(BININT2 + chr(obj&0xff) + chr(obj>>8))
Guido van Rossum5c938d02003-01-28 03:03:08 +0000385 return
386 # Next check for 4-byte signed ints:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000387 high_bits = obj >> 31 # note that Python shift sign-extends
Tim Petersd95c2df2003-01-28 03:41:54 +0000388 if high_bits == 0 or high_bits == -1:
Tim Peters44714002001-04-10 05:02:52 +0000389 # All high bits are copies of bit 2**31, so the value
390 # fits in a 4-byte signed int.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000391 self.write(BININT + pack("<i", obj))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000392 return
Tim Peters44714002001-04-10 05:02:52 +0000393 # Text pickle, or int too big to fit in signed 4-byte format.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000394 self.write(INT + `obj` + '\n')
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000395 dispatch[IntType] = save_int
396
Guido van Rossum3a41c612003-01-28 15:10:22 +0000397 def save_long(self, obj, pack=struct.pack):
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000398 if self.proto >= 2:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000399 bytes = encode_long(obj)
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000400 n = len(bytes)
401 if n < 256:
402 self.write(LONG1 + chr(n) + bytes)
403 else:
404 self.write(LONG4 + pack("<i", n) + bytes)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000405 self.write(LONG + `obj` + '\n')
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000406 dispatch[LongType] = save_long
407
Guido van Rossum3a41c612003-01-28 15:10:22 +0000408 def save_float(self, obj, pack=struct.pack):
Guido van Rossumd3703791998-10-22 20:15:36 +0000409 if self.bin:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000410 self.write(BINFLOAT + pack('>d', obj))
Guido van Rossumd3703791998-10-22 20:15:36 +0000411 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000412 self.write(FLOAT + `obj` + '\n')
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000413 dispatch[FloatType] = save_float
414
Guido van Rossum3a41c612003-01-28 15:10:22 +0000415 def save_string(self, obj, pack=struct.pack):
Tim Petersc32d8242001-04-10 02:48:53 +0000416 if self.bin:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000417 n = len(obj)
Tim Petersbbf63cd2003-01-27 21:15:36 +0000418 if n < 256:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000419 self.write(SHORT_BINSTRING + chr(n) + obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000420 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000421 self.write(BINSTRING + pack("<i", n) + obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000422 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000423 self.write(STRING + `obj` + '\n')
424 self.memoize(obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000425 dispatch[StringType] = save_string
426
Guido van Rossum3a41c612003-01-28 15:10:22 +0000427 def save_unicode(self, obj, pack=struct.pack):
Tim Petersc32d8242001-04-10 02:48:53 +0000428 if self.bin:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000429 encoding = obj.encode('utf-8')
Tim Petersbbf63cd2003-01-27 21:15:36 +0000430 n = len(encoding)
Guido van Rossum5c938d02003-01-28 03:03:08 +0000431 self.write(BINUNICODE + pack("<i", n) + encoding)
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +0000432 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000433 obj = obj.replace("\\", "\\u005c")
434 obj = obj.replace("\n", "\\u000a")
435 self.write(UNICODE + obj.encode('raw-unicode-escape') + '\n')
436 self.memoize(obj)
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +0000437 dispatch[UnicodeType] = save_unicode
438
Guido van Rossum31584cb2001-01-22 14:53:29 +0000439 if StringType == UnicodeType:
440 # This is true for Jython
Guido van Rossum3a41c612003-01-28 15:10:22 +0000441 def save_string(self, obj, pack=struct.pack):
442 unicode = obj.isunicode()
Guido van Rossum31584cb2001-01-22 14:53:29 +0000443
Tim Petersc32d8242001-04-10 02:48:53 +0000444 if self.bin:
Guido van Rossum31584cb2001-01-22 14:53:29 +0000445 if unicode:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000446 obj = obj.encode("utf-8")
447 l = len(obj)
Tim Petersc32d8242001-04-10 02:48:53 +0000448 if l < 256 and not unicode:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000449 self.write(SHORT_BINSTRING + chr(l) + obj)
Guido van Rossum31584cb2001-01-22 14:53:29 +0000450 else:
Guido van Rossum5c938d02003-01-28 03:03:08 +0000451 s = pack("<i", l)
Guido van Rossum31584cb2001-01-22 14:53:29 +0000452 if unicode:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000453 self.write(BINUNICODE + s + obj)
Guido van Rossum31584cb2001-01-22 14:53:29 +0000454 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000455 self.write(BINSTRING + s + obj)
Guido van Rossum31584cb2001-01-22 14:53:29 +0000456 else:
Tim Peters658cba62001-02-09 20:06:00 +0000457 if unicode:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000458 obj = obj.replace("\\", "\\u005c")
459 obj = obj.replace("\n", "\\u000a")
460 obj = obj.encode('raw-unicode-escape')
461 self.write(UNICODE + obj + '\n')
Guido van Rossum31584cb2001-01-22 14:53:29 +0000462 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000463 self.write(STRING + `obj` + '\n')
464 self.memoize(obj)
Guido van Rossum31584cb2001-01-22 14:53:29 +0000465 dispatch[StringType] = save_string
Tim Peters658cba62001-02-09 20:06:00 +0000466
Guido van Rossum3a41c612003-01-28 15:10:22 +0000467 def save_tuple(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000468 write = self.write
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000469 proto = self.proto
470
Guido van Rossum3a41c612003-01-28 15:10:22 +0000471 n = len(obj)
Tim Petersd97da802003-01-28 05:48:29 +0000472 if n == 0 and proto:
473 write(EMPTY_TUPLE)
474 return
475
476 save = self.save
477 memo = self.memo
478 if n <= 3 and proto >= 2:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000479 for element in obj:
Tim Petersd97da802003-01-28 05:48:29 +0000480 save(element)
481 # Subtle. Same as in the big comment below.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000482 if id(obj) in memo:
483 get = self.get(memo[id(obj)][0])
Tim Petersd97da802003-01-28 05:48:29 +0000484 write(POP * n + get)
485 else:
486 write(_tuplesize2code[n])
Guido van Rossum3a41c612003-01-28 15:10:22 +0000487 self.memoize(obj)
Tim Petersd97da802003-01-28 05:48:29 +0000488 return
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000489
Tim Petersff57bff2003-01-28 05:34:53 +0000490 # proto 0, or proto 1 and tuple isn't empty, or proto > 1 and tuple
491 # has more than 3 elements.
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000492 write(MARK)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000493 for element in obj:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000494 save(element)
495
Guido van Rossum3a41c612003-01-28 15:10:22 +0000496 if n and id(obj) in memo:
Tim Petersf558da02003-01-28 02:09:55 +0000497 # Subtle. d was not in memo when we entered save_tuple(), so
498 # the process of saving the tuple's elements must have saved
499 # the tuple itself: the tuple is recursive. The proper action
500 # now is to throw away everything we put on the stack, and
501 # simply GET the tuple (it's already constructed). This check
502 # could have been done in the "for element" loop instead, but
503 # recursive tuples are a rare thing.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000504 get = self.get(memo[id(obj)][0])
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000505 if proto:
Tim Petersf558da02003-01-28 02:09:55 +0000506 write(POP_MARK + get)
507 else: # proto 0 -- POP_MARK not available
Tim Petersd97da802003-01-28 05:48:29 +0000508 write(POP * (n+1) + get)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000509 return
510
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000511 # No recursion (including the empty-tuple case for protocol 0).
Tim Peters518df0d2003-01-28 01:00:38 +0000512 self.write(TUPLE)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000513 if obj: # No need to memoize empty tuple
514 self.memoize(obj)
Jeremy Hylton3422c992003-01-24 19:29:52 +0000515
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000516 dispatch[TupleType] = save_tuple
517
Tim Petersa6ae9a22003-01-28 16:58:41 +0000518 # save_empty_tuple() isn't used by anything in Python 2.3. However, I
519 # found a Pickler subclass in Zope3 that calls it, so it's not harmless
520 # to remove it.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000521 def save_empty_tuple(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000522 self.write(EMPTY_TUPLE)
523
Guido van Rossum3a41c612003-01-28 15:10:22 +0000524 def save_list(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000525 write = self.write
526 save = self.save
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000527
Tim Petersc32d8242001-04-10 02:48:53 +0000528 if self.bin:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000529 write(EMPTY_LIST)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000530 self.memoize(obj)
531 n = len(obj)
Tim Peters21c18f02003-01-28 01:15:46 +0000532 if n > 1:
533 write(MARK)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000534 for element in obj:
Tim Peters21c18f02003-01-28 01:15:46 +0000535 save(element)
536 write(APPENDS)
537 elif n:
538 assert n == 1
Guido van Rossum3a41c612003-01-28 15:10:22 +0000539 save(obj[0])
Tim Peters21c18f02003-01-28 01:15:46 +0000540 write(APPEND)
541 # else the list is empty, and we're already done
542
543 else: # proto 0 -- can't use EMPTY_LIST or APPENDS
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000544 write(MARK + LIST)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000545 self.memoize(obj)
546 for element in obj:
Tim Peters21c18f02003-01-28 01:15:46 +0000547 save(element)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000548 write(APPEND)
549
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000550 dispatch[ListType] = save_list
551
Guido van Rossum3a41c612003-01-28 15:10:22 +0000552 def save_dict(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000553 write = self.write
554 save = self.save
Guido van Rossum3a41c612003-01-28 15:10:22 +0000555 items = obj.iteritems()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000556
Tim Petersc32d8242001-04-10 02:48:53 +0000557 if self.bin:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000558 write(EMPTY_DICT)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000559 self.memoize(obj)
560 if len(obj) > 1:
Tim Peters064567e2003-01-28 01:34:43 +0000561 write(MARK)
562 for key, value in items:
563 save(key)
564 save(value)
565 write(SETITEMS)
566 return
Tim Peters82ca59e2003-01-28 16:47:59 +0000567 # else (dict is empty or a singleton), fall through to the
568 # SETITEM code at the end
Tim Peters064567e2003-01-28 01:34:43 +0000569 else: # proto 0 -- can't use EMPTY_DICT or SETITEMS
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000570 write(MARK + DICT)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000571 self.memoize(obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000572
Guido van Rossum3a41c612003-01-28 15:10:22 +0000573 # proto 0 or len(obj) < 2
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000574 for key, value in items:
575 save(key)
576 save(value)
Tim Peters064567e2003-01-28 01:34:43 +0000577 write(SETITEM)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000578
579 dispatch[DictionaryType] = save_dict
Jeremy Hylton2b9d0291998-05-27 22:38:22 +0000580 if not PyStringMap is None:
581 dispatch[PyStringMap] = save_dict
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000582
Guido van Rossum3a41c612003-01-28 15:10:22 +0000583 def save_inst(self, obj):
584 cls = obj.__class__
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000585
586 memo = self.memo
587 write = self.write
588 save = self.save
589
Guido van Rossum3a41c612003-01-28 15:10:22 +0000590 if hasattr(obj, '__getinitargs__'):
591 args = obj.__getinitargs__()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000592 len(args) # XXX Assert it's a sequence
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000593 _keep_alive(args, memo)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000594 else:
595 args = ()
596
597 write(MARK)
598
Tim Petersc32d8242001-04-10 02:48:53 +0000599 if self.bin:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000600 save(cls)
Tim Peters3b769832003-01-28 03:51:36 +0000601 for arg in args:
602 save(arg)
603 write(OBJ)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000604 else:
Tim Peters3b769832003-01-28 03:51:36 +0000605 for arg in args:
606 save(arg)
607 write(INST + cls.__module__ + '\n' + cls.__name__ + '\n')
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000608
Guido van Rossum3a41c612003-01-28 15:10:22 +0000609 self.memoize(obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000610
611 try:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000612 getstate = obj.__getstate__
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000613 except AttributeError:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000614 stuff = obj.__dict__
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000615 else:
616 stuff = getstate()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000617 _keep_alive(stuff, memo)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000618 save(stuff)
619 write(BUILD)
Tim Peters3b769832003-01-28 03:51:36 +0000620
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000621 dispatch[InstanceType] = save_inst
622
Guido van Rossum3a41c612003-01-28 15:10:22 +0000623 def save_global(self, obj, name = None):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000624 write = self.write
625 memo = self.memo
626
Tim Petersc32d8242001-04-10 02:48:53 +0000627 if name is None:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000628 name = obj.__name__
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000629
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000630 try:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000631 module = obj.__module__
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000632 except AttributeError:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000633 module = whichmodule(obj, name)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000634
Guido van Rossumb0a98e92001-08-17 18:49:52 +0000635 try:
636 __import__(module)
637 mod = sys.modules[module]
638 klass = getattr(mod, name)
639 except (ImportError, KeyError, AttributeError):
640 raise PicklingError(
641 "Can't pickle %r: it's not found as %s.%s" %
Guido van Rossum3a41c612003-01-28 15:10:22 +0000642 (obj, module, name))
Guido van Rossumb0a98e92001-08-17 18:49:52 +0000643 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000644 if klass is not obj:
Guido van Rossumb0a98e92001-08-17 18:49:52 +0000645 raise PicklingError(
646 "Can't pickle %r: it's not the same object as %s.%s" %
Guido van Rossum3a41c612003-01-28 15:10:22 +0000647 (obj, module, name))
Guido van Rossumb0a98e92001-08-17 18:49:52 +0000648
Tim Peters518df0d2003-01-28 01:00:38 +0000649 write(GLOBAL + module + '\n' + name + '\n')
Guido van Rossum3a41c612003-01-28 15:10:22 +0000650 self.memoize(obj)
Tim Peters3b769832003-01-28 03:51:36 +0000651
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000652 dispatch[ClassType] = save_global
653 dispatch[FunctionType] = save_global
654 dispatch[BuiltinFunctionType] = save_global
Tim Peters6d6c1a32001-08-02 04:15:00 +0000655 dispatch[TypeType] = save_global
Guido van Rossum0c891ce1995-03-14 15:09:05 +0000656
Guido van Rossum1be31752003-01-28 15:19:53 +0000657# Pickling helpers
Guido van Rossuma48061a1995-01-10 00:31:14 +0000658
Guido van Rossum5ed5c4c1997-09-03 00:23:54 +0000659def _keep_alive(x, memo):
660 """Keeps a reference to the object x in the memo.
661
662 Because we remember objects by their id, we have
663 to assure that possibly temporary objects are kept
664 alive by referencing them.
665 We store a reference at the id of the memo, which should
666 normally not be used unless someone tries to deepcopy
667 the memo itself...
668 """
669 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000670 memo[id(memo)].append(x)
Guido van Rossum5ed5c4c1997-09-03 00:23:54 +0000671 except KeyError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000672 # aha, this is the first one :-)
673 memo[id(memo)]=[x]
Guido van Rossum5ed5c4c1997-09-03 00:23:54 +0000674
675
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000676classmap = {} # called classmap for backwards compatibility
Guido van Rossuma48061a1995-01-10 00:31:14 +0000677
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000678def whichmodule(func, funcname):
679 """Figure out the module in which a function occurs.
Guido van Rossuma48061a1995-01-10 00:31:14 +0000680
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000681 Search sys.modules for the module.
682 Cache in classmap.
683 Return a module name.
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000684 If the function cannot be found, return __main__.
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000685 """
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000686 if func in classmap:
687 return classmap[func]
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000688
689 for name, module in sys.modules.items():
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000690 if module is None:
Jeremy Hylton065a5ab2002-09-19 22:57:26 +0000691 continue # skip dummy package entries
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000692 if name != '__main__' and \
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000693 hasattr(module, funcname) and \
694 getattr(module, funcname) is func:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000695 break
696 else:
697 name = '__main__'
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000698 classmap[func] = name
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000699 return name
Guido van Rossuma48061a1995-01-10 00:31:14 +0000700
701
Guido van Rossum1be31752003-01-28 15:19:53 +0000702# Unpickling machinery
703
Guido van Rossuma48061a1995-01-10 00:31:14 +0000704class Unpickler:
705
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000706 def __init__(self, file):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000707 """This takes a file-like object for reading a pickle data stream.
708
709 This class automatically determines whether the data stream was
710 written in binary mode or not, so it does not need a flag as in
711 the Pickler class factory.
712
713 The file-like object must have two methods, a read() method that
714 takes an integer argument, and a readline() method that requires no
715 arguments. Both methods should return a string. Thus file-like
716 object can be a file object opened for reading, a StringIO object,
717 or any other custom object that meets this interface.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000718 """
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000719 self.readline = file.readline
720 self.read = file.read
721 self.memo = {}
Guido van Rossuma48061a1995-01-10 00:31:14 +0000722
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000723 def load(self):
Guido van Rossum3a41c612003-01-28 15:10:22 +0000724 """Read a pickled object representation from the open file.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000725
Guido van Rossum3a41c612003-01-28 15:10:22 +0000726 Return the reconstituted object hierarchy specified in the file.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000727 """
Jeremy Hylton20747fa2001-11-09 16:15:04 +0000728 self.mark = object() # any new unique object
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000729 self.stack = []
730 self.append = self.stack.append
731 read = self.read
732 dispatch = self.dispatch
733 try:
734 while 1:
735 key = read(1)
736 dispatch[key](self)
Guido van Rossumff871742000-12-13 18:11:56 +0000737 except _Stop, stopinst:
738 return stopinst.value
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000739
Tim Petersc23d18a2003-01-28 01:41:51 +0000740 # Return largest index k such that self.stack[k] is self.mark.
741 # If the stack doesn't contain a mark, eventually raises IndexError.
742 # This could be sped by maintaining another stack, of indices at which
743 # the mark appears. For that matter, the latter stack would suffice,
744 # and we wouldn't need to push mark objects on self.stack at all.
745 # Doing so is probably a good thing, though, since if the pickle is
746 # corrupt (or hostile) we may get a clue from finding self.mark embedded
747 # in unpickled objects.
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000748 def marker(self):
749 stack = self.stack
750 mark = self.mark
751 k = len(stack)-1
752 while stack[k] is not mark: k = k-1
753 return k
754
755 dispatch = {}
756
757 def load_eof(self):
758 raise EOFError
759 dispatch[''] = load_eof
760
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000761 def load_proto(self):
762 proto = ord(self.read(1))
763 if not 0 <= proto <= 2:
764 raise ValueError, "unsupported pickle protocol: %d" % proto
765 dispatch[PROTO] = load_proto
766
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000767 def load_persid(self):
768 pid = self.readline()[:-1]
769 self.append(self.persistent_load(pid))
770 dispatch[PERSID] = load_persid
771
772 def load_binpersid(self):
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +0000773 pid = self.stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000774 self.append(self.persistent_load(pid))
775 dispatch[BINPERSID] = load_binpersid
776
777 def load_none(self):
778 self.append(None)
779 dispatch[NONE] = load_none
780
Guido van Rossum7d97d312003-01-28 04:25:27 +0000781 def load_false(self):
782 self.append(False)
783 dispatch[NEWFALSE] = load_false
784
785 def load_true(self):
786 self.append(True)
787 dispatch[NEWTRUE] = load_true
788
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000789 def load_int(self):
Tim Peters19ef62d2001-08-28 22:21:18 +0000790 data = self.readline()
Guido van Rossume2763392002-04-05 19:30:08 +0000791 if data == FALSE[1:]:
792 val = False
793 elif data == TRUE[1:]:
794 val = True
795 else:
796 try:
797 val = int(data)
798 except ValueError:
799 val = long(data)
800 self.append(val)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000801 dispatch[INT] = load_int
802
803 def load_binint(self):
804 self.append(mloads('i' + self.read(4)))
805 dispatch[BININT] = load_binint
806
807 def load_binint1(self):
Tim Petersbbf63cd2003-01-27 21:15:36 +0000808 self.append(ord(self.read(1)))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000809 dispatch[BININT1] = load_binint1
810
811 def load_binint2(self):
812 self.append(mloads('i' + self.read(2) + '\000\000'))
813 dispatch[BININT2] = load_binint2
Tim Peters2344fae2001-01-15 00:50:52 +0000814
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000815 def load_long(self):
Guido van Rossumff871742000-12-13 18:11:56 +0000816 self.append(long(self.readline()[:-1], 0))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000817 dispatch[LONG] = load_long
818
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000819 def load_long1(self):
820 n = ord(self.read(1))
821 bytes = self.read(n)
822 return decode_long(bytes)
823 dispatch[LONG1] = load_long1
824
825 def load_long4(self):
826 n = mloads('i' + self.read(4))
827 bytes = self.read(n)
828 return decode_long(bytes)
829 dispatch[LONG4] = load_long4
830
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000831 def load_float(self):
Guido van Rossumff871742000-12-13 18:11:56 +0000832 self.append(float(self.readline()[:-1]))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000833 dispatch[FLOAT] = load_float
834
Guido van Rossumd3703791998-10-22 20:15:36 +0000835 def load_binfloat(self, unpack=struct.unpack):
836 self.append(unpack('>d', self.read(8))[0])
837 dispatch[BINFLOAT] = load_binfloat
838
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000839 def load_string(self):
Jeremy Hyltonbe467e52000-09-15 15:14:51 +0000840 rep = self.readline()[:-1]
Tim Petersad5a7712003-01-28 16:23:33 +0000841 for q in "\"'": # double or single quote
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000842 if rep.startswith(q):
843 if not rep.endswith(q):
844 raise ValueError, "insecure string pickle"
845 rep = rep[len(q):-len(q)]
846 break
847 else:
Jeremy Hyltonbe467e52000-09-15 15:14:51 +0000848 raise ValueError, "insecure string pickle"
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000849 self.append(rep.decode("string-escape"))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000850 dispatch[STRING] = load_string
851
852 def load_binstring(self):
853 len = mloads('i' + self.read(4))
854 self.append(self.read(len))
855 dispatch[BINSTRING] = load_binstring
856
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +0000857 def load_unicode(self):
858 self.append(unicode(self.readline()[:-1],'raw-unicode-escape'))
859 dispatch[UNICODE] = load_unicode
860
861 def load_binunicode(self):
862 len = mloads('i' + self.read(4))
863 self.append(unicode(self.read(len),'utf-8'))
864 dispatch[BINUNICODE] = load_binunicode
865
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000866 def load_short_binstring(self):
Tim Petersbbf63cd2003-01-27 21:15:36 +0000867 len = ord(self.read(1))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000868 self.append(self.read(len))
869 dispatch[SHORT_BINSTRING] = load_short_binstring
870
871 def load_tuple(self):
872 k = self.marker()
873 self.stack[k:] = [tuple(self.stack[k+1:])]
874 dispatch[TUPLE] = load_tuple
875
876 def load_empty_tuple(self):
877 self.stack.append(())
878 dispatch[EMPTY_TUPLE] = load_empty_tuple
879
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000880 def load_tuple1(self):
881 self.stack[-1] = (self.stack[-1],)
882 dispatch[TUPLE1] = load_tuple1
883
884 def load_tuple2(self):
885 self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
886 dispatch[TUPLE2] = load_tuple2
887
888 def load_tuple3(self):
889 self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
890 dispatch[TUPLE3] = load_tuple3
891
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000892 def load_empty_list(self):
893 self.stack.append([])
894 dispatch[EMPTY_LIST] = load_empty_list
895
896 def load_empty_dictionary(self):
897 self.stack.append({})
898 dispatch[EMPTY_DICT] = load_empty_dictionary
899
900 def load_list(self):
901 k = self.marker()
902 self.stack[k:] = [self.stack[k+1:]]
903 dispatch[LIST] = load_list
904
905 def load_dict(self):
906 k = self.marker()
907 d = {}
908 items = self.stack[k+1:]
909 for i in range(0, len(items), 2):
910 key = items[i]
911 value = items[i+1]
912 d[key] = value
913 self.stack[k:] = [d]
914 dispatch[DICT] = load_dict
915
916 def load_inst(self):
917 k = self.marker()
918 args = tuple(self.stack[k+1:])
919 del self.stack[k:]
920 module = self.readline()[:-1]
921 name = self.readline()[:-1]
922 klass = self.find_class(module, name)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000923 instantiated = 0
924 if (not args and type(klass) is ClassType and
925 not hasattr(klass, "__getinitargs__")):
926 try:
927 value = _EmptyClass()
928 value.__class__ = klass
Guido van Rossumb19e2a31998-04-13 18:08:45 +0000929 instantiated = 1
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000930 except RuntimeError:
931 # In restricted execution, assignment to inst.__class__ is
932 # prohibited
933 pass
934 if not instantiated:
Guido van Rossum743d17e1998-09-15 20:25:57 +0000935 try:
Barry Warsawbf4d9592001-11-15 23:42:58 +0000936 if not hasattr(klass, '__safe_for_unpickling__'):
937 raise UnpicklingError('%s is not safe for unpickling' %
938 klass)
Guido van Rossum743d17e1998-09-15 20:25:57 +0000939 value = apply(klass, args)
940 except TypeError, err:
941 raise TypeError, "in constructor for %s: %s" % (
942 klass.__name__, str(err)), sys.exc_info()[2]
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000943 self.append(value)
944 dispatch[INST] = load_inst
945
946 def load_obj(self):
947 stack = self.stack
948 k = self.marker()
949 klass = stack[k + 1]
950 del stack[k + 1]
Tim Peters2344fae2001-01-15 00:50:52 +0000951 args = tuple(stack[k + 1:])
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000952 del stack[k:]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000953 instantiated = 0
954 if (not args and type(klass) is ClassType and
955 not hasattr(klass, "__getinitargs__")):
956 try:
957 value = _EmptyClass()
958 value.__class__ = klass
959 instantiated = 1
960 except RuntimeError:
961 # In restricted execution, assignment to inst.__class__ is
962 # prohibited
963 pass
964 if not instantiated:
965 value = apply(klass, args)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000966 self.append(value)
Tim Peters2344fae2001-01-15 00:50:52 +0000967 dispatch[OBJ] = load_obj
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000968
Guido van Rossum3a41c612003-01-28 15:10:22 +0000969 def load_newobj(self):
970 args = self.stack.pop()
971 cls = self.stack[-1]
972 obj = cls.__new__(cls, *args)
973 self.stack[-1:] = obj
974 dispatch[NEWOBJ] = load_newobj
975
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000976 def load_global(self):
977 module = self.readline()[:-1]
978 name = self.readline()[:-1]
979 klass = self.find_class(module, name)
980 self.append(klass)
981 dispatch[GLOBAL] = load_global
982
983 def find_class(self, module, name):
Barry Warsawbf4d9592001-11-15 23:42:58 +0000984 __import__(module)
985 mod = sys.modules[module]
986 klass = getattr(mod, name)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000987 return klass
988
989 def load_reduce(self):
990 stack = self.stack
991
992 callable = stack[-2]
993 arg_tup = stack[-1]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000994 del stack[-2:]
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000995
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000996 if type(callable) is not ClassType:
Raymond Hettinger54f02222002-06-01 14:18:47 +0000997 if not callable in safe_constructors:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000998 try:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000999 safe = callable.__safe_for_unpickling__
1000 except AttributeError:
1001 safe = None
Guido van Rossuma48061a1995-01-10 00:31:14 +00001002
Tim Petersc32d8242001-04-10 02:48:53 +00001003 if not safe:
Tim Peters2344fae2001-01-15 00:50:52 +00001004 raise UnpicklingError, "%s is not safe for " \
1005 "unpickling" % callable
Guido van Rossuma48061a1995-01-10 00:31:14 +00001006
Guido van Rossum45e2fbc1998-03-26 21:13:24 +00001007 if arg_tup is None:
Guido van Rossumbc64e222003-01-28 16:34:19 +00001008 # A hack for Jim Fulton's ExtensionClass, now deprecated
1009 warnings.warn("__basicnew__ special case is deprecated",
Tim Peters8ac14952002-05-23 15:15:30 +00001010 DeprecationWarning)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +00001011 value = callable.__basicnew__()
1012 else:
1013 value = apply(callable, arg_tup)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001014 self.append(value)
1015 dispatch[REDUCE] = load_reduce
Guido van Rossuma48061a1995-01-10 00:31:14 +00001016
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001017 def load_pop(self):
1018 del self.stack[-1]
1019 dispatch[POP] = load_pop
Guido van Rossum7b5430f1995-03-04 22:25:21 +00001020
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001021 def load_pop_mark(self):
1022 k = self.marker()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +00001023 del self.stack[k:]
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001024 dispatch[POP_MARK] = load_pop_mark
Guido van Rossuma48061a1995-01-10 00:31:14 +00001025
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001026 def load_dup(self):
Guido van Rossumb1062fc1998-03-31 17:00:46 +00001027 self.append(self.stack[-1])
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001028 dispatch[DUP] = load_dup
Guido van Rossuma48061a1995-01-10 00:31:14 +00001029
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001030 def load_get(self):
1031 self.append(self.memo[self.readline()[:-1]])
1032 dispatch[GET] = load_get
Guido van Rossum78536471996-04-12 13:36:27 +00001033
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001034 def load_binget(self):
Tim Petersbbf63cd2003-01-27 21:15:36 +00001035 i = ord(self.read(1))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001036 self.append(self.memo[`i`])
1037 dispatch[BINGET] = load_binget
Guido van Rossum78536471996-04-12 13:36:27 +00001038
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001039 def load_long_binget(self):
1040 i = mloads('i' + self.read(4))
1041 self.append(self.memo[`i`])
1042 dispatch[LONG_BINGET] = load_long_binget
Guido van Rossum78536471996-04-12 13:36:27 +00001043
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001044 def load_put(self):
1045 self.memo[self.readline()[:-1]] = self.stack[-1]
1046 dispatch[PUT] = load_put
Guido van Rossuma48061a1995-01-10 00:31:14 +00001047
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001048 def load_binput(self):
Tim Petersbbf63cd2003-01-27 21:15:36 +00001049 i = ord(self.read(1))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001050 self.memo[`i`] = self.stack[-1]
1051 dispatch[BINPUT] = load_binput
Guido van Rossuma48061a1995-01-10 00:31:14 +00001052
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001053 def load_long_binput(self):
1054 i = mloads('i' + self.read(4))
1055 self.memo[`i`] = self.stack[-1]
1056 dispatch[LONG_BINPUT] = load_long_binput
Guido van Rossuma48061a1995-01-10 00:31:14 +00001057
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001058 def load_append(self):
1059 stack = self.stack
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001060 value = stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001061 list = stack[-1]
1062 list.append(value)
1063 dispatch[APPEND] = load_append
Guido van Rossuma48061a1995-01-10 00:31:14 +00001064
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001065 def load_appends(self):
1066 stack = self.stack
1067 mark = self.marker()
1068 list = stack[mark - 1]
Tim Peters209ad952003-01-28 01:44:45 +00001069 list.extend(stack[mark + 1:])
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001070 del stack[mark:]
1071 dispatch[APPENDS] = load_appends
Tim Peters2344fae2001-01-15 00:50:52 +00001072
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001073 def load_setitem(self):
1074 stack = self.stack
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001075 value = stack.pop()
1076 key = stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001077 dict = stack[-1]
1078 dict[key] = value
1079 dispatch[SETITEM] = load_setitem
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001080
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001081 def load_setitems(self):
1082 stack = self.stack
1083 mark = self.marker()
1084 dict = stack[mark - 1]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +00001085 for i in range(mark + 1, len(stack), 2):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001086 dict[stack[i]] = stack[i + 1]
Guido van Rossuma48061a1995-01-10 00:31:14 +00001087
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001088 del stack[mark:]
1089 dispatch[SETITEMS] = load_setitems
Guido van Rossuma48061a1995-01-10 00:31:14 +00001090
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001091 def load_build(self):
1092 stack = self.stack
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001093 value = stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001094 inst = stack[-1]
1095 try:
1096 setstate = inst.__setstate__
1097 except AttributeError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +00001098 try:
1099 inst.__dict__.update(value)
1100 except RuntimeError:
1101 # XXX In restricted execution, the instance's __dict__ is not
1102 # accessible. Use the old way of unpickling the instance
1103 # variables. This is a semantic different when unpickling in
1104 # restricted vs. unrestricted modes.
1105 for k, v in value.items():
1106 setattr(inst, k, v)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001107 else:
1108 setstate(value)
1109 dispatch[BUILD] = load_build
Guido van Rossuma48061a1995-01-10 00:31:14 +00001110
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001111 def load_mark(self):
1112 self.append(self.mark)
1113 dispatch[MARK] = load_mark
Guido van Rossuma48061a1995-01-10 00:31:14 +00001114
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001115 def load_stop(self):
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001116 value = self.stack.pop()
Guido van Rossumff871742000-12-13 18:11:56 +00001117 raise _Stop(value)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001118 dispatch[STOP] = load_stop
Guido van Rossuma48061a1995-01-10 00:31:14 +00001119
Guido van Rossume467be61997-12-05 19:42:42 +00001120# Helper class for load_inst/load_obj
1121
1122class _EmptyClass:
1123 pass
Guido van Rossuma48061a1995-01-10 00:31:14 +00001124
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001125# Encode/decode longs.
1126
1127def encode_long(x):
1128 r"""Encode a long to a two's complement little-ending binary string.
1129 >>> encode_long(255L)
1130 '\xff\x00'
1131 >>> encode_long(32767L)
1132 '\xff\x7f'
1133 >>> encode_long(-256L)
1134 '\x00\xff'
1135 >>> encode_long(-32768L)
1136 '\x00\x80'
1137 >>> encode_long(-128L)
1138 '\x80'
1139 >>> encode_long(127L)
1140 '\x7f'
1141 >>>
1142 """
1143 digits = []
1144 while not -128 <= x < 128:
1145 digits.append(x & 0xff)
1146 x >>= 8
1147 digits.append(x & 0xff)
1148 return "".join(map(chr, digits))
1149
1150def decode_long(data):
1151 r"""Decode a long from a two's complement little-endian binary string.
1152 >>> decode_long("\xff\x00")
1153 255L
1154 >>> decode_long("\xff\x7f")
1155 32767L
1156 >>> decode_long("\x00\xff")
1157 -256L
1158 >>> decode_long("\x00\x80")
1159 -32768L
1160 >>> decode_long("\x80")
1161 -128L
1162 >>> decode_long("\x7f")
1163 127L
1164 """
1165 x = 0L
1166 i = 0L
1167 for c in data:
1168 x |= long(ord(c)) << i
1169 i += 8L
1170 if data and ord(c) >= 0x80:
1171 x -= 1L << i
1172 return x
1173
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001174# Shorthands
1175
Jeremy Hyltonabe2c622001-10-15 21:29:28 +00001176try:
1177 from cStringIO import StringIO
1178except ImportError:
1179 from StringIO import StringIO
Guido van Rossumc7c5e691996-07-22 22:26:07 +00001180
Guido van Rossum3a41c612003-01-28 15:10:22 +00001181def dump(obj, file, proto=1):
1182 Pickler(file, proto).dump(obj)
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001183
Guido van Rossum3a41c612003-01-28 15:10:22 +00001184def dumps(obj, proto=1):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001185 file = StringIO()
Guido van Rossum3a41c612003-01-28 15:10:22 +00001186 Pickler(file, proto).dump(obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001187 return file.getvalue()
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001188
1189def load(file):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001190 return Unpickler(file).load()
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001191
1192def loads(str):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001193 file = StringIO(str)
1194 return Unpickler(file).load()
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001195
1196# Doctest
1197
1198def _test():
1199 import doctest
1200 return doctest.testmod()
1201
1202if __name__ == "__main__":
1203 _test()