blob: da7bdb6f361bdfdb89ebaa0400247f68f8b5a9da [file] [log] [blame]
Guido van Rossum54f22ed2000-02-04 15:10:34 +00001"""Create portable serialized representations of Python objects.
Guido van Rossuma48061a1995-01-10 00:31:14 +00002
Guido van Rossume467be61997-12-05 19:42:42 +00003See module cPickle for a (much) faster implementation.
4See module copy_reg for a mechanism for registering custom picklers.
Tim Peters22a449a2003-01-27 20:16:36 +00005See module pickletools source for extensive comments.
Guido van Rossuma48061a1995-01-10 00:31:14 +00006
Guido van Rossume467be61997-12-05 19:42:42 +00007Classes:
Guido van Rossuma48061a1995-01-10 00:31:14 +00008
Guido van Rossume467be61997-12-05 19:42:42 +00009 Pickler
10 Unpickler
Guido van Rossuma48061a1995-01-10 00:31:14 +000011
Guido van Rossume467be61997-12-05 19:42:42 +000012Functions:
Guido van Rossuma48061a1995-01-10 00:31:14 +000013
Guido van Rossume467be61997-12-05 19:42:42 +000014 dump(object, file)
15 dumps(object) -> string
16 load(file) -> object
17 loads(string) -> object
Guido van Rossuma48061a1995-01-10 00:31:14 +000018
Guido van Rossume467be61997-12-05 19:42:42 +000019Misc variables:
Guido van Rossuma48061a1995-01-10 00:31:14 +000020
Fred Drakefe82acc1998-02-13 03:24:48 +000021 __version__
Guido van Rossume467be61997-12-05 19:42:42 +000022 format_version
23 compatible_formats
Guido van Rossuma48061a1995-01-10 00:31:14 +000024
Guido van Rossuma48061a1995-01-10 00:31:14 +000025"""
26
Guido van Rossum743d17e1998-09-15 20:25:57 +000027__version__ = "$Revision$" # Code version
Guido van Rossuma48061a1995-01-10 00:31:14 +000028
29from types import *
Guido van Rossumb26a97a2003-01-28 22:29:13 +000030from copy_reg import dispatch_table, _reconstructor
Guido van Rossum255f3ee2003-01-29 06:14:11 +000031from copy_reg import extension_registry, inverted_registry, extension_cache
Guido van Rossumd3703791998-10-22 20:15:36 +000032import marshal
33import sys
34import struct
Skip Montanaro23bafc62001-02-18 03:10:09 +000035import re
Guido van Rossumbc64e222003-01-28 16:34:19 +000036import warnings
Guido van Rossuma48061a1995-01-10 00:31:14 +000037
Skip Montanaro352674d2001-02-07 23:14:30 +000038__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
39 "Unpickler", "dump", "dumps", "load", "loads"]
40
Tim Petersc0c12b52003-01-29 00:56:17 +000041# These are purely informational; no code uses these.
Guido van Rossumf29d3d62003-01-27 22:47:53 +000042format_version = "2.0" # File format version we write
43compatible_formats = ["1.0", # Original protocol 0
Guido van Rossumbc64e222003-01-28 16:34:19 +000044 "1.1", # Protocol 0 with INST added
Guido van Rossumf29d3d62003-01-27 22:47:53 +000045 "1.2", # Original protocol 1
46 "1.3", # Protocol 1 with BINFLOAT added
47 "2.0", # Protocol 2
48 ] # Old format versions we can read
Guido van Rossumb72cf2d1997-04-09 17:32:51 +000049
Guido van Rossume0b90422003-01-28 03:17:21 +000050# Why use struct.pack() for pickling but marshal.loads() for
Tim Petersc0c12b52003-01-29 00:56:17 +000051# unpickling? struct.pack() is 40% faster than marshal.dumps(), but
Guido van Rossume0b90422003-01-28 03:17:21 +000052# marshal.loads() is twice as fast as struct.unpack()!
Guido van Rossumb72cf2d1997-04-09 17:32:51 +000053mloads = marshal.loads
Guido van Rossum0c891ce1995-03-14 15:09:05 +000054
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000055class PickleError(Exception):
Neal Norwitzefbb67b2002-05-30 12:12:04 +000056 """A common base class for the other pickling exceptions."""
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000057 pass
58
59class PicklingError(PickleError):
60 """This exception is raised when an unpicklable object is passed to the
61 dump() method.
62
63 """
64 pass
65
66class UnpicklingError(PickleError):
67 """This exception is raised when there is a problem unpickling an object,
68 such as a security violation.
69
70 Note that other exceptions may also be raised during unpickling, including
71 (but not necessarily limited to) AttributeError, EOFError, ImportError,
72 and IndexError.
73
74 """
75 pass
Guido van Rossum7849da81995-03-09 14:08:35 +000076
Tim Petersc0c12b52003-01-29 00:56:17 +000077# An instance of _Stop is raised by Unpickler.load_stop() in response to
78# the STOP opcode, passing the object that is the result of unpickling.
Guido van Rossumff871742000-12-13 18:11:56 +000079class _Stop(Exception):
80 def __init__(self, value):
81 self.value = value
82
Guido van Rossum533dbcf2003-01-28 17:55:05 +000083# Jython has PyStringMap; it's a dict subclass with string keys
Jeremy Hylton2b9d0291998-05-27 22:38:22 +000084try:
85 from org.python.core import PyStringMap
86except ImportError:
87 PyStringMap = None
88
Guido van Rossum533dbcf2003-01-28 17:55:05 +000089# UnicodeType may or may not be exported (normally imported from types)
Guido van Rossumdbb718f2001-09-21 19:22:34 +000090try:
91 UnicodeType
92except NameError:
93 UnicodeType = None
94
Tim Peters22a449a2003-01-27 20:16:36 +000095# Pickle opcodes. See pickletools.py for extensive docs. The listing
96# here is in kind-of alphabetical order of 1-character pickle code.
97# pickletools groups them by purpose.
Guido van Rossumdbb718f2001-09-21 19:22:34 +000098
Tim Peters22a449a2003-01-27 20:16:36 +000099MARK = '(' # push special markobject on stack
100STOP = '.' # every pickle ends with STOP
101POP = '0' # discard topmost stack item
102POP_MARK = '1' # discard stack top through topmost markobject
103DUP = '2' # duplicate top stack item
104FLOAT = 'F' # push float object; decimal string argument
105INT = 'I' # push integer or bool; decimal string argument
106BININT = 'J' # push four-byte signed int
107BININT1 = 'K' # push 1-byte unsigned int
108LONG = 'L' # push long; decimal string argument
109BININT2 = 'M' # push 2-byte unsigned int
110NONE = 'N' # push None
111PERSID = 'P' # push persistent object; id is taken from string arg
112BINPERSID = 'Q' # " " " ; " " " " stack
113REDUCE = 'R' # apply callable to argtuple, both on stack
114STRING = 'S' # push string; NL-terminated string argument
115BINSTRING = 'T' # push string; counted binary string argument
116SHORT_BINSTRING = 'U' # " " ; " " " " < 256 bytes
117UNICODE = 'V' # push Unicode string; raw-unicode-escaped'd argument
118BINUNICODE = 'X' # " " " ; counted UTF-8 string argument
119APPEND = 'a' # append stack top to list below it
120BUILD = 'b' # call __setstate__ or __dict__.update()
121GLOBAL = 'c' # push self.find_class(modname, name); 2 string args
122DICT = 'd' # build a dict from stack items
123EMPTY_DICT = '}' # push empty dict
124APPENDS = 'e' # extend list on stack by topmost stack slice
125GET = 'g' # push item from memo on stack; index is string arg
126BINGET = 'h' # " " " " " " ; " " 1-byte arg
127INST = 'i' # build & push class instance
128LONG_BINGET = 'j' # push item from memo on stack; index is 4-byte arg
129LIST = 'l' # build list from topmost stack items
130EMPTY_LIST = ']' # push empty list
131OBJ = 'o' # build & push class instance
132PUT = 'p' # store stack top in memo; index is string arg
133BINPUT = 'q' # " " " " " ; " " 1-byte arg
134LONG_BINPUT = 'r' # " " " " " ; " " 4-byte arg
135SETITEM = 's' # add key+value pair to dict
136TUPLE = 't' # build tuple from topmost stack items
137EMPTY_TUPLE = ')' # push empty tuple
138SETITEMS = 'u' # modify dict by adding topmost key+value pairs
139BINFLOAT = 'G' # push float; arg is 8-byte float encoding
140
141TRUE = 'I01\n' # not an opcode; see INT docs in pickletools.py
142FALSE = 'I00\n' # not an opcode; see INT docs in pickletools.py
Guido van Rossum77f6a652002-04-03 22:41:51 +0000143
Guido van Rossum586c9e82003-01-29 06:16:12 +0000144# Protocol 2
Guido van Rossum5a2d8f52003-01-27 21:44:25 +0000145
Tim Peterse1054782003-01-28 00:22:12 +0000146PROTO = '\x80' # identify pickle protocol
147NEWOBJ = '\x81' # build object by applying cls.__new__ to argtuple
148EXT1 = '\x82' # push object from extension registry; 1-byte index
149EXT2 = '\x83' # ditto, but 2-byte index
150EXT4 = '\x84' # ditto, but 4-byte index
151TUPLE1 = '\x85' # build 1-tuple from stack top
152TUPLE2 = '\x86' # build 2-tuple from two topmost stack items
153TUPLE3 = '\x87' # build 3-tuple from three topmost stack items
154NEWTRUE = '\x88' # push True
155NEWFALSE = '\x89' # push False
156LONG1 = '\x8a' # push long from < 256 bytes
157LONG4 = '\x8b' # push really big long
Guido van Rossum5a2d8f52003-01-27 21:44:25 +0000158
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000159_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
160
Guido van Rossuma48061a1995-01-10 00:31:14 +0000161
Skip Montanaro23bafc62001-02-18 03:10:09 +0000162__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
Neal Norwitzd5ba4ae2002-02-11 18:12:06 +0000163del x
Skip Montanaro23bafc62001-02-18 03:10:09 +0000164
Guido van Rossum1be31752003-01-28 15:19:53 +0000165
166# Pickling machinery
167
Guido van Rossuma48061a1995-01-10 00:31:14 +0000168class Pickler:
169
Guido van Rossumf29d3d62003-01-27 22:47:53 +0000170 def __init__(self, file, proto=1):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000171 """This takes a file-like object for writing a pickle data stream.
172
Guido van Rossumf29d3d62003-01-27 22:47:53 +0000173 The optional proto argument tells the pickler to use the given
174 protocol; supported protocols are 0, 1, 2. The default
175 protocol is 1 (in previous Python versions the default was 0).
176
177 Protocol 1 is more efficient than protocol 0; protocol 2 is
178 more efficient than protocol 1. Protocol 2 is not the default
179 because it is not supported by older Python versions.
180
181 XXX Protocol 2 is not yet implemented.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000182
183 The file parameter must have a write() method that accepts a single
184 string argument. It can thus be an open file object, a StringIO
185 object, or any other custom object that meets this interface.
186
187 """
Guido van Rossum1be31752003-01-28 15:19:53 +0000188 if proto not in (0, 1, 2):
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000189 raise ValueError, "pickle protocol must be 0, 1 or 2"
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000190 self.write = file.write
191 self.memo = {}
Guido van Rossum1be31752003-01-28 15:19:53 +0000192 self.proto = int(proto)
Guido van Rossumf29d3d62003-01-27 22:47:53 +0000193 self.bin = proto >= 1
Guido van Rossum5d9113d2003-01-29 17:58:45 +0000194 self.fast = 0
Guido van Rossuma48061a1995-01-10 00:31:14 +0000195
Fred Drake7f781c92002-05-01 20:33:53 +0000196 def clear_memo(self):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000197 """Clears the pickler's "memo".
198
199 The memo is the data structure that remembers which objects the
Tim Petersb377f8a2003-01-28 00:23:36 +0000200 pickler has already seen, so that shared or recursive objects are
201 pickled by reference and not by value. This method is useful when
202 re-using picklers.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000203
204 """
Fred Drake7f781c92002-05-01 20:33:53 +0000205 self.memo.clear()
206
Guido van Rossum3a41c612003-01-28 15:10:22 +0000207 def dump(self, obj):
208 """Write a pickled representation of obj to the open file.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000209
210 Either the binary or ASCII format will be used, depending on the
211 value of the bin flag passed to the constructor.
212
213 """
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000214 if self.proto >= 2:
215 self.write(PROTO + chr(self.proto))
Guido van Rossum3a41c612003-01-28 15:10:22 +0000216 self.save(obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000217 self.write(STOP)
Guido van Rossuma48061a1995-01-10 00:31:14 +0000218
Jeremy Hylton3422c992003-01-24 19:29:52 +0000219 def memoize(self, obj):
220 """Store an object in the memo."""
221
Tim Peterse46b73f2003-01-27 21:22:10 +0000222 # The Pickler memo is a dictionary mapping object ids to 2-tuples
223 # that contain the Unpickler memo key and the object being memoized.
224 # The memo key is written to the pickle and will become
Jeremy Hylton3422c992003-01-24 19:29:52 +0000225 # the key in the Unpickler's memo. The object is stored in the
Tim Peterse46b73f2003-01-27 21:22:10 +0000226 # Pickler memo so that transient objects are kept alive during
227 # pickling.
Jeremy Hylton3422c992003-01-24 19:29:52 +0000228
Tim Peterse46b73f2003-01-27 21:22:10 +0000229 # The use of the Unpickler memo length as the memo key is just a
230 # convention. The only requirement is that the memo values be unique.
231 # But there appears no advantage to any other scheme, and this
Tim Peterscbd0a322003-01-28 00:24:43 +0000232 # scheme allows the Unpickler memo to be implemented as a plain (but
Tim Peterse46b73f2003-01-27 21:22:10 +0000233 # growable) array, indexed by memo key.
Guido van Rossum5d9113d2003-01-29 17:58:45 +0000234 if self.fast:
235 return
Guido van Rossum9b40e802003-01-30 06:37:41 +0000236 assert id(obj) not in self.memo
Jeremy Hylton3422c992003-01-24 19:29:52 +0000237 memo_len = len(self.memo)
238 self.write(self.put(memo_len))
Tim Peters518df0d2003-01-28 01:00:38 +0000239 self.memo[id(obj)] = memo_len, obj
Jeremy Hylton3422c992003-01-24 19:29:52 +0000240
Tim Petersbb38e302003-01-27 21:25:41 +0000241 # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
Guido van Rossum5c938d02003-01-28 03:03:08 +0000242 def put(self, i, pack=struct.pack):
Tim Petersc32d8242001-04-10 02:48:53 +0000243 if self.bin:
Tim Petersc32d8242001-04-10 02:48:53 +0000244 if i < 256:
Guido van Rossum5c938d02003-01-28 03:03:08 +0000245 return BINPUT + chr(i)
246 else:
247 return LONG_BINPUT + pack("<i", i)
Guido van Rossuma48061a1995-01-10 00:31:14 +0000248
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000249 return PUT + `i` + '\n'
Guido van Rossuma48061a1995-01-10 00:31:14 +0000250
Tim Petersbb38e302003-01-27 21:25:41 +0000251 # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
Guido van Rossum5c938d02003-01-28 03:03:08 +0000252 def get(self, i, pack=struct.pack):
Tim Petersc32d8242001-04-10 02:48:53 +0000253 if self.bin:
Tim Petersc32d8242001-04-10 02:48:53 +0000254 if i < 256:
Guido van Rossum5c938d02003-01-28 03:03:08 +0000255 return BINGET + chr(i)
256 else:
257 return LONG_BINGET + pack("<i", i)
Guido van Rossuma48061a1995-01-10 00:31:14 +0000258
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000259 return GET + `i` + '\n'
Tim Peters2344fae2001-01-15 00:50:52 +0000260
Guido van Rossumac5b5d22003-01-28 22:01:16 +0000261 def save(self, obj):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000262 # Check for persistent id (defined by a subclass)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000263 pid = self.persistent_id(obj)
Guido van Rossumbc64e222003-01-28 16:34:19 +0000264 if pid:
Jeremy Hylton5e0f4e72002-11-13 22:01:27 +0000265 self.save_pers(pid)
266 return
Guido van Rossuma48061a1995-01-10 00:31:14 +0000267
Guido van Rossumbc64e222003-01-28 16:34:19 +0000268 # Check the memo
269 x = self.memo.get(id(obj))
270 if x:
271 self.write(self.get(x[0]))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000272 return
273
Guido van Rossumbc64e222003-01-28 16:34:19 +0000274 # Check the type dispatch table
Guido van Rossum3a41c612003-01-28 15:10:22 +0000275 t = type(obj)
Guido van Rossumbc64e222003-01-28 16:34:19 +0000276 f = self.dispatch.get(t)
277 if f:
278 f(self, obj) # Call unbound method with explicit self
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000279 return
280
Guido van Rossumbc64e222003-01-28 16:34:19 +0000281 # Check for a class with a custom metaclass; treat as regular class
Tim Petersb32a8312003-01-28 00:48:09 +0000282 try:
283 issc = issubclass(t, TypeType)
Guido van Rossumbc64e222003-01-28 16:34:19 +0000284 except TypeError: # t is not a class (old Boost; see SF #502085)
Tim Petersb32a8312003-01-28 00:48:09 +0000285 issc = 0
286 if issc:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000287 self.save_global(obj)
Tim Petersb32a8312003-01-28 00:48:09 +0000288 return
289
Guido van Rossumbc64e222003-01-28 16:34:19 +0000290 # Check copy_reg.dispatch_table
291 reduce = dispatch_table.get(t)
Guido van Rossumac5b5d22003-01-28 22:01:16 +0000292 if not reduce:
293 # Check for a __reduce__ method.
294 # Subtle: get the unbound method from the class, so that
295 # protocol 2 can override the default __reduce__ that all
296 # classes inherit from object. This has the added
297 # advantage that the call always has the form reduce(obj)
298 reduce = getattr(t, "__reduce__", None)
299 if self.proto >= 2:
300 # Protocol 2 can do better than the default __reduce__
301 if reduce is object.__reduce__:
302 reduce = None
303 if not reduce:
Guido van Rossum54fb1922003-01-28 18:22:35 +0000304 self.save_newobj(obj)
305 return
Guido van Rossumac5b5d22003-01-28 22:01:16 +0000306 if not reduce:
Guido van Rossumbc64e222003-01-28 16:34:19 +0000307 raise PicklingError("Can't pickle %r object: %r" %
308 (t.__name__, obj))
Guido van Rossumac5b5d22003-01-28 22:01:16 +0000309 rv = reduce(obj)
Tim Petersb32a8312003-01-28 00:48:09 +0000310
Guido van Rossumbc64e222003-01-28 16:34:19 +0000311 # Check for string returned by reduce(), meaning "save as global"
312 if type(rv) is StringType:
313 self.save_global(obj, rv)
Tim Petersb32a8312003-01-28 00:48:09 +0000314 return
315
Guido van Rossumbc64e222003-01-28 16:34:19 +0000316 # Assert that reduce() returned a tuple
317 if type(rv) is not TupleType:
318 raise PicklingError("%s must return string or tuple" % reduce)
Tim Petersb32a8312003-01-28 00:48:09 +0000319
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000320 # Assert that it returned an appropriately sized tuple
Guido van Rossumbc64e222003-01-28 16:34:19 +0000321 l = len(rv)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000322 if not (2 <= l <= 5):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000323 raise PicklingError("Tuple returned by %s must have "
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000324 "two to five elements" % reduce)
Tim Petersb32a8312003-01-28 00:48:09 +0000325
Guido van Rossumbc64e222003-01-28 16:34:19 +0000326 # Save the reduce() output and finally memoize the object
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000327 self.save_reduce(obj=obj, *rv)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000328
Guido van Rossum3a41c612003-01-28 15:10:22 +0000329 def persistent_id(self, obj):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000330 # This exists so a subclass can override it
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000331 return None
332
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000333 def save_pers(self, pid):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000334 # Save a persistent id reference
Tim Petersbd1cdb92003-01-28 01:03:10 +0000335 if self.bin:
Jeremy Hylton5e0f4e72002-11-13 22:01:27 +0000336 self.save(pid)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000337 self.write(BINPERSID)
Tim Petersbd1cdb92003-01-28 01:03:10 +0000338 else:
339 self.write(PERSID + str(pid) + '\n')
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000340
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000341 def save_reduce(self, func, args, state=None,
342 listitems=None, dictitems=None, obj=None):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000343 # This API is be called by some subclasses
344
345 # Assert that args is a tuple or None
346 if not isinstance(args, TupleType):
347 if args is None:
348 # A hack for Jim Fulton's ExtensionClass, now deprecated.
349 # See load_reduce()
350 warnings.warn("__basicnew__ special case is deprecated",
351 DeprecationWarning)
352 else:
353 raise PicklingError(
354 "args from reduce() should be a tuple")
355
356 # Assert that func is callable
357 if not callable(func):
358 raise PicklingError("func from reduce should be callable")
359
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000360 save = self.save
Guido van Rossumbc64e222003-01-28 16:34:19 +0000361 write = self.write
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000362
Guido van Rossumd053b4b2003-01-31 16:51:45 +0000363 # Protocol 2 special case: if func's name is __newobj__, use NEWOBJ
364 if self.proto >= 2 and getattr(func, "__name__", "") == "__newobj__":
365 # A __reduce__ implementation can direct protocol 2 to
366 # use the more efficient NEWOBJ opcode, while still
367 # allowing protocol 0 and 1 to work normally. For this to
368 # work, the function returned by __reduce__ should be
369 # called __newobj__, and its first argument should be a
370 # new-style class. The implementation for __newobj__
371 # should be as follows, although pickle has no way to
372 # verify this:
373 #
374 # def __newobj__(cls, *args):
375 # return cls.__new__(cls, *args)
376 #
377 # Protocols 0 and 1 will pickle a reference to __newobj__,
378 # while protocol 2 (and above) will pickle a reference to
379 # cls, the remaining args tuple, and the NEWOBJ code,
380 # which calls cls.__new__(cls, *args) at unpickling time
381 # (see load_newobj below). If __reduce__ returns a
382 # three-tuple, the state from the third tuple item will be
383 # pickled regardless of the protocol, calling __setstate__
384 # at unpickling time (see load_build below).
385 #
386 # Note that no standard __newobj__ implementation exists;
387 # you have to provide your own. This is to enforce
388 # compatibility with Python 2.2 (pickles written using
389 # protocol 0 or 1 in Python 2.3 should be unpicklable by
390 # Python 2.2).
391 cls = args[0]
392 if not hasattr(cls, "__new__"):
393 raise PicklingError(
394 "args[0] from __newobj__ args has no __new__")
Guido van Rossumf7f45172003-01-31 17:17:49 +0000395 if obj is not None and cls is not obj.__class__:
396 raise PicklingError(
397 "args[0] from __newobj__ args has the wrong class")
Guido van Rossumd053b4b2003-01-31 16:51:45 +0000398 args = args[1:]
399 save(cls)
400 save(args)
401 write(NEWOBJ)
402 else:
403 save(func)
404 save(args)
405 write(REDUCE)
Tim Peters2344fae2001-01-15 00:50:52 +0000406
Guido van Rossumf7f45172003-01-31 17:17:49 +0000407 if obj is not None:
408 self.memoize(obj)
409
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000410 # More new special cases (that work with older protocols as
411 # well): when __reduce__ returns a tuple with 4 or 5 items,
412 # the 4th and 5th item should be iterators that provide list
413 # items and dict items (as (key, value) tuples), or None.
414
415 if listitems is not None:
416 self._batch_appends(listitems)
417
418 if dictitems is not None:
419 self._batch_setitems(dictitems)
420
Tim Petersc32d8242001-04-10 02:48:53 +0000421 if state is not None:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000422 save(state)
423 write(BUILD)
424
Guido van Rossum54fb1922003-01-28 18:22:35 +0000425 def save_newobj(self, obj):
426 # Save a new-style class instance, using protocol 2.
Guido van Rossumac5b5d22003-01-28 22:01:16 +0000427 assert self.proto >= 2 # This only works for protocol 2
Guido van Rossum54fb1922003-01-28 18:22:35 +0000428 t = type(obj)
Guido van Rossum54fb1922003-01-28 18:22:35 +0000429 getnewargs = getattr(obj, "__getnewargs__", None)
430 if getnewargs:
Neal Norwitzd1740682003-01-31 04:04:23 +0000431 args = getnewargs() # This better not reference obj
Guido van Rossum3d8c01b2003-01-28 19:48:18 +0000432 else:
Guido van Rossum5d9113d2003-01-29 17:58:45 +0000433 args = ()
Guido van Rossum3d8c01b2003-01-28 19:48:18 +0000434
435 save = self.save
436 write = self.write
437
Guido van Rossum9b40e802003-01-30 06:37:41 +0000438 self.save(t)
Guido van Rossum3d8c01b2003-01-28 19:48:18 +0000439 save(args)
440 write(NEWOBJ)
Guido van Rossum54fb1922003-01-28 18:22:35 +0000441 self.memoize(obj)
Guido van Rossum3d8c01b2003-01-28 19:48:18 +0000442
443 if isinstance(obj, list):
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000444 self._batch_appends(iter(obj))
Guido van Rossum3d8c01b2003-01-28 19:48:18 +0000445 elif isinstance(obj, dict):
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000446 self._batch_setitems(obj.iteritems())
Guido van Rossum3d8c01b2003-01-28 19:48:18 +0000447
Guido van Rossum54fb1922003-01-28 18:22:35 +0000448 getstate = getattr(obj, "__getstate__", None)
Guido van Rossum45486172003-01-30 05:39:04 +0000449
Guido van Rossum54fb1922003-01-28 18:22:35 +0000450 if getstate:
Guido van Rossum4fba2202003-01-30 05:41:19 +0000451 # A class may define both __getstate__ and __getnewargs__.
452 # If they are the same function, we ignore __getstate__.
453 # This is for the benefit of protocols 0 and 1, which don't
454 # use __getnewargs__. Note that the only way to make them
455 # the same function is something like this:
456 #
457 # class C(object):
458 # def __getstate__(self):
459 # return ...
460 # __getnewargs__ = __getstate__
461 #
462 # No tricks are needed to ignore __setstate__; it simply
463 # won't be called when we don't generate BUILD.
464 # Also note that when __getnewargs__ and __getstate__ are
465 # the same function, we don't do the default thing of
466 # looking for __dict__ and slots either -- it is assumed
467 # that __getnewargs__ returns all the state there is
468 # (which should be a safe assumption since __getstate__
469 # returns the *same* state).
470 if getstate == getnewargs:
471 return
472
Guido van Rossumac5b5d22003-01-28 22:01:16 +0000473 try:
474 state = getstate()
475 except TypeError, err:
476 # XXX Catch generic exception caused by __slots__
477 if str(err) != ("a class that defines __slots__ "
478 "without defining __getstate__ "
479 "cannot be pickled"):
480 print repr(str(err))
481 raise # Not that specific exception
482 getstate = None
Guido van Rossum4fba2202003-01-30 05:41:19 +0000483
Guido van Rossumac5b5d22003-01-28 22:01:16 +0000484 if not getstate:
Guido van Rossum54fb1922003-01-28 18:22:35 +0000485 state = getattr(obj, "__dict__", None)
Guido van Rossum255f3ee2003-01-29 06:14:11 +0000486 if not state:
487 state = None
Guido van Rossumac5b5d22003-01-28 22:01:16 +0000488 # If there are slots, the state becomes a tuple of two
489 # items: the first item the regular __dict__ or None, and
490 # the second a dict mapping slot names to slot values
491 names = _slotnames(t)
492 if names:
493 slots = {}
494 nil = []
495 for name in names:
496 value = getattr(obj, name, nil)
497 if value is not nil:
498 slots[name] = value
499 if slots:
500 state = (state, slots)
501
Guido van Rossum54fb1922003-01-28 18:22:35 +0000502 if state is not None:
Guido van Rossum3d8c01b2003-01-28 19:48:18 +0000503 save(state)
504 write(BUILD)
Guido van Rossum54fb1922003-01-28 18:22:35 +0000505
Guido van Rossumbc64e222003-01-28 16:34:19 +0000506 # Methods below this point are dispatched through the dispatch table
507
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000508 dispatch = {}
509
Guido van Rossum3a41c612003-01-28 15:10:22 +0000510 def save_none(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000511 self.write(NONE)
512 dispatch[NoneType] = save_none
513
Guido van Rossum3a41c612003-01-28 15:10:22 +0000514 def save_bool(self, obj):
Guido van Rossum7d97d312003-01-28 04:25:27 +0000515 if self.proto >= 2:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000516 self.write(obj and NEWTRUE or NEWFALSE)
Guido van Rossum7d97d312003-01-28 04:25:27 +0000517 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000518 self.write(obj and TRUE or FALSE)
Guido van Rossum77f6a652002-04-03 22:41:51 +0000519 dispatch[bool] = save_bool
520
Guido van Rossum3a41c612003-01-28 15:10:22 +0000521 def save_int(self, obj, pack=struct.pack):
Tim Petersc32d8242001-04-10 02:48:53 +0000522 if self.bin:
Tim Peters44714002001-04-10 05:02:52 +0000523 # If the int is small enough to fit in a signed 4-byte 2's-comp
524 # format, we can store it more efficiently than the general
525 # case.
Guido van Rossum5c938d02003-01-28 03:03:08 +0000526 # First one- and two-byte unsigned ints:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000527 if obj >= 0:
528 if obj <= 0xff:
529 self.write(BININT1 + chr(obj))
Guido van Rossum5c938d02003-01-28 03:03:08 +0000530 return
Guido van Rossum3a41c612003-01-28 15:10:22 +0000531 if obj <= 0xffff:
Guido van Rossumba884f32003-01-29 20:14:23 +0000532 self.write("%c%c%c" % (BININT2, obj&0xff, obj>>8))
Guido van Rossum5c938d02003-01-28 03:03:08 +0000533 return
534 # Next check for 4-byte signed ints:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000535 high_bits = obj >> 31 # note that Python shift sign-extends
Tim Petersd95c2df2003-01-28 03:41:54 +0000536 if high_bits == 0 or high_bits == -1:
Tim Peters44714002001-04-10 05:02:52 +0000537 # All high bits are copies of bit 2**31, so the value
538 # fits in a 4-byte signed int.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000539 self.write(BININT + pack("<i", obj))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000540 return
Tim Peters44714002001-04-10 05:02:52 +0000541 # Text pickle, or int too big to fit in signed 4-byte format.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000542 self.write(INT + `obj` + '\n')
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000543 dispatch[IntType] = save_int
544
Guido van Rossum3a41c612003-01-28 15:10:22 +0000545 def save_long(self, obj, pack=struct.pack):
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000546 if self.proto >= 2:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000547 bytes = encode_long(obj)
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000548 n = len(bytes)
549 if n < 256:
550 self.write(LONG1 + chr(n) + bytes)
551 else:
552 self.write(LONG4 + pack("<i", n) + bytes)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000553 self.write(LONG + `obj` + '\n')
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000554 dispatch[LongType] = save_long
555
Guido van Rossum3a41c612003-01-28 15:10:22 +0000556 def save_float(self, obj, pack=struct.pack):
Guido van Rossumd3703791998-10-22 20:15:36 +0000557 if self.bin:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000558 self.write(BINFLOAT + pack('>d', obj))
Guido van Rossumd3703791998-10-22 20:15:36 +0000559 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000560 self.write(FLOAT + `obj` + '\n')
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000561 dispatch[FloatType] = save_float
562
Guido van Rossum3a41c612003-01-28 15:10:22 +0000563 def save_string(self, obj, pack=struct.pack):
Tim Petersc32d8242001-04-10 02:48:53 +0000564 if self.bin:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000565 n = len(obj)
Tim Petersbbf63cd2003-01-27 21:15:36 +0000566 if n < 256:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000567 self.write(SHORT_BINSTRING + chr(n) + obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000568 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000569 self.write(BINSTRING + pack("<i", n) + obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000570 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000571 self.write(STRING + `obj` + '\n')
572 self.memoize(obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000573 dispatch[StringType] = save_string
574
Guido van Rossum3a41c612003-01-28 15:10:22 +0000575 def save_unicode(self, obj, pack=struct.pack):
Tim Petersc32d8242001-04-10 02:48:53 +0000576 if self.bin:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000577 encoding = obj.encode('utf-8')
Tim Petersbbf63cd2003-01-27 21:15:36 +0000578 n = len(encoding)
Guido van Rossum5c938d02003-01-28 03:03:08 +0000579 self.write(BINUNICODE + pack("<i", n) + encoding)
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +0000580 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000581 obj = obj.replace("\\", "\\u005c")
582 obj = obj.replace("\n", "\\u000a")
583 self.write(UNICODE + obj.encode('raw-unicode-escape') + '\n')
584 self.memoize(obj)
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +0000585 dispatch[UnicodeType] = save_unicode
586
Guido van Rossum31584cb2001-01-22 14:53:29 +0000587 if StringType == UnicodeType:
588 # This is true for Jython
Guido van Rossum3a41c612003-01-28 15:10:22 +0000589 def save_string(self, obj, pack=struct.pack):
590 unicode = obj.isunicode()
Guido van Rossum31584cb2001-01-22 14:53:29 +0000591
Tim Petersc32d8242001-04-10 02:48:53 +0000592 if self.bin:
Guido van Rossum31584cb2001-01-22 14:53:29 +0000593 if unicode:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000594 obj = obj.encode("utf-8")
595 l = len(obj)
Tim Petersc32d8242001-04-10 02:48:53 +0000596 if l < 256 and not unicode:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000597 self.write(SHORT_BINSTRING + chr(l) + obj)
Guido van Rossum31584cb2001-01-22 14:53:29 +0000598 else:
Guido van Rossum5c938d02003-01-28 03:03:08 +0000599 s = pack("<i", l)
Guido van Rossum31584cb2001-01-22 14:53:29 +0000600 if unicode:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000601 self.write(BINUNICODE + s + obj)
Guido van Rossum31584cb2001-01-22 14:53:29 +0000602 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000603 self.write(BINSTRING + s + obj)
Guido van Rossum31584cb2001-01-22 14:53:29 +0000604 else:
Tim Peters658cba62001-02-09 20:06:00 +0000605 if unicode:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000606 obj = obj.replace("\\", "\\u005c")
607 obj = obj.replace("\n", "\\u000a")
608 obj = obj.encode('raw-unicode-escape')
609 self.write(UNICODE + obj + '\n')
Guido van Rossum31584cb2001-01-22 14:53:29 +0000610 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000611 self.write(STRING + `obj` + '\n')
612 self.memoize(obj)
Guido van Rossum31584cb2001-01-22 14:53:29 +0000613 dispatch[StringType] = save_string
Tim Peters658cba62001-02-09 20:06:00 +0000614
Guido van Rossum3a41c612003-01-28 15:10:22 +0000615 def save_tuple(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000616 write = self.write
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000617 proto = self.proto
618
Guido van Rossum3a41c612003-01-28 15:10:22 +0000619 n = len(obj)
Tim Petersd97da802003-01-28 05:48:29 +0000620 if n == 0 and proto:
621 write(EMPTY_TUPLE)
622 return
623
624 save = self.save
625 memo = self.memo
626 if n <= 3 and proto >= 2:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000627 for element in obj:
Tim Petersd97da802003-01-28 05:48:29 +0000628 save(element)
629 # Subtle. Same as in the big comment below.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000630 if id(obj) in memo:
631 get = self.get(memo[id(obj)][0])
Tim Petersd97da802003-01-28 05:48:29 +0000632 write(POP * n + get)
633 else:
634 write(_tuplesize2code[n])
Guido van Rossum3a41c612003-01-28 15:10:22 +0000635 self.memoize(obj)
Tim Petersd97da802003-01-28 05:48:29 +0000636 return
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000637
Tim Petersff57bff2003-01-28 05:34:53 +0000638 # proto 0, or proto 1 and tuple isn't empty, or proto > 1 and tuple
639 # has more than 3 elements.
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000640 write(MARK)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000641 for element in obj:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000642 save(element)
643
Guido van Rossum3a41c612003-01-28 15:10:22 +0000644 if n and id(obj) in memo:
Tim Petersf558da02003-01-28 02:09:55 +0000645 # Subtle. d was not in memo when we entered save_tuple(), so
646 # the process of saving the tuple's elements must have saved
647 # the tuple itself: the tuple is recursive. The proper action
648 # now is to throw away everything we put on the stack, and
649 # simply GET the tuple (it's already constructed). This check
650 # could have been done in the "for element" loop instead, but
651 # recursive tuples are a rare thing.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000652 get = self.get(memo[id(obj)][0])
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000653 if proto:
Tim Petersf558da02003-01-28 02:09:55 +0000654 write(POP_MARK + get)
655 else: # proto 0 -- POP_MARK not available
Tim Petersd97da802003-01-28 05:48:29 +0000656 write(POP * (n+1) + get)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000657 return
658
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000659 # No recursion (including the empty-tuple case for protocol 0).
Tim Peters518df0d2003-01-28 01:00:38 +0000660 self.write(TUPLE)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000661 if obj: # No need to memoize empty tuple
662 self.memoize(obj)
Jeremy Hylton3422c992003-01-24 19:29:52 +0000663
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000664 dispatch[TupleType] = save_tuple
665
Tim Petersa6ae9a22003-01-28 16:58:41 +0000666 # save_empty_tuple() isn't used by anything in Python 2.3. However, I
667 # found a Pickler subclass in Zope3 that calls it, so it's not harmless
668 # to remove it.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000669 def save_empty_tuple(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000670 self.write(EMPTY_TUPLE)
671
Guido van Rossum3a41c612003-01-28 15:10:22 +0000672 def save_list(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000673 write = self.write
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000674
Tim Petersc32d8242001-04-10 02:48:53 +0000675 if self.bin:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000676 write(EMPTY_LIST)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000677 else: # proto 0 -- can't use EMPTY_LIST
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000678 write(MARK + LIST)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000679
680 self.memoize(obj)
681 self._batch_appends(iter(obj))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000682
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000683 dispatch[ListType] = save_list
684
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000685 _BATCHSIZE = 1000
686
687 def _batch_appends(self, items):
688 # Helper to batch up APPENDS sequences
689 save = self.save
690 write = self.write
691
692 if not self.bin:
693 for x in items:
694 save(x)
695 write(APPEND)
696 return
697
698 r = xrange(self._BATCHSIZE)
699 while items is not None:
700 tmp = []
701 for i in r:
702 try:
703 tmp.append(items.next())
704 except StopIteration:
705 items = None
706 break
707 n = len(tmp)
708 if n > 1:
709 write(MARK)
710 for x in tmp:
711 save(x)
712 write(APPENDS)
713 elif n:
714 save(tmp[0])
715 write(APPEND)
716 # else tmp is empty, and we're done
717
Guido van Rossum3a41c612003-01-28 15:10:22 +0000718 def save_dict(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000719 write = self.write
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000720
Tim Petersc32d8242001-04-10 02:48:53 +0000721 if self.bin:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000722 write(EMPTY_DICT)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000723 else: # proto 0 -- can't use EMPTY_DICT
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000724 write(MARK + DICT)
725
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000726 self.memoize(obj)
727 self._batch_setitems(obj.iteritems())
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000728
729 dispatch[DictionaryType] = save_dict
Jeremy Hylton2b9d0291998-05-27 22:38:22 +0000730 if not PyStringMap is None:
731 dispatch[PyStringMap] = save_dict
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000732
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000733 def _batch_setitems(self, items):
734 # Helper to batch up SETITEMS sequences; proto >= 1 only
735 save = self.save
736 write = self.write
737
738 if not self.bin:
739 for k, v in items:
740 save(k)
741 save(v)
742 write(SETITEM)
743 return
744
745 r = xrange(self._BATCHSIZE)
746 while items is not None:
747 tmp = []
748 for i in r:
749 try:
750 tmp.append(items.next())
751 except StopIteration:
752 items = None
753 break
754 n = len(tmp)
755 if n > 1:
756 write(MARK)
757 for k, v in tmp:
758 save(k)
759 save(v)
760 write(SETITEMS)
761 elif n:
762 k, v = tmp[0]
763 save(k)
764 save(v)
765 write(SETITEM)
766 # else tmp is empty, and we're done
767
Guido van Rossum3a41c612003-01-28 15:10:22 +0000768 def save_inst(self, obj):
769 cls = obj.__class__
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000770
771 memo = self.memo
772 write = self.write
773 save = self.save
774
Guido van Rossum3a41c612003-01-28 15:10:22 +0000775 if hasattr(obj, '__getinitargs__'):
776 args = obj.__getinitargs__()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000777 len(args) # XXX Assert it's a sequence
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000778 _keep_alive(args, memo)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000779 else:
780 args = ()
781
782 write(MARK)
783
Tim Petersc32d8242001-04-10 02:48:53 +0000784 if self.bin:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000785 save(cls)
Tim Peters3b769832003-01-28 03:51:36 +0000786 for arg in args:
787 save(arg)
788 write(OBJ)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000789 else:
Tim Peters3b769832003-01-28 03:51:36 +0000790 for arg in args:
791 save(arg)
792 write(INST + cls.__module__ + '\n' + cls.__name__ + '\n')
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000793
Guido van Rossum3a41c612003-01-28 15:10:22 +0000794 self.memoize(obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000795
796 try:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000797 getstate = obj.__getstate__
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000798 except AttributeError:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000799 stuff = obj.__dict__
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000800 else:
801 stuff = getstate()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000802 _keep_alive(stuff, memo)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000803 save(stuff)
804 write(BUILD)
Tim Peters3b769832003-01-28 03:51:36 +0000805
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000806 dispatch[InstanceType] = save_inst
807
Guido van Rossum255f3ee2003-01-29 06:14:11 +0000808 def save_global(self, obj, name=None, pack=struct.pack):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000809 write = self.write
810 memo = self.memo
811
Tim Petersc32d8242001-04-10 02:48:53 +0000812 if name is None:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000813 name = obj.__name__
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000814
Jeremy Hylton4f0dcc92003-01-31 18:33:18 +0000815 module = getattr(obj, "__module__", None)
816 if module is None:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000817 module = whichmodule(obj, name)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000818
Guido van Rossumb0a98e92001-08-17 18:49:52 +0000819 try:
820 __import__(module)
821 mod = sys.modules[module]
822 klass = getattr(mod, name)
823 except (ImportError, KeyError, AttributeError):
824 raise PicklingError(
825 "Can't pickle %r: it's not found as %s.%s" %
Guido van Rossum3a41c612003-01-28 15:10:22 +0000826 (obj, module, name))
Guido van Rossumb0a98e92001-08-17 18:49:52 +0000827 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000828 if klass is not obj:
Guido van Rossumb0a98e92001-08-17 18:49:52 +0000829 raise PicklingError(
830 "Can't pickle %r: it's not the same object as %s.%s" %
Guido van Rossum3a41c612003-01-28 15:10:22 +0000831 (obj, module, name))
Guido van Rossumb0a98e92001-08-17 18:49:52 +0000832
Guido van Rossum255f3ee2003-01-29 06:14:11 +0000833 if self.proto >= 2:
834 code = extension_registry.get((module, name))
835 if code:
836 assert code > 0
837 if code <= 0xff:
838 write(EXT1 + chr(code))
839 elif code <= 0xffff:
Guido van Rossumba884f32003-01-29 20:14:23 +0000840 write("%c%c%c" % (EXT2, code&0xff, code>>8))
Guido van Rossum255f3ee2003-01-29 06:14:11 +0000841 else:
842 write(EXT4 + pack("<i", code))
843 return
844
Tim Peters518df0d2003-01-28 01:00:38 +0000845 write(GLOBAL + module + '\n' + name + '\n')
Guido van Rossum3a41c612003-01-28 15:10:22 +0000846 self.memoize(obj)
Tim Peters3b769832003-01-28 03:51:36 +0000847
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000848 dispatch[ClassType] = save_global
849 dispatch[FunctionType] = save_global
850 dispatch[BuiltinFunctionType] = save_global
Tim Peters6d6c1a32001-08-02 04:15:00 +0000851 dispatch[TypeType] = save_global
Guido van Rossum0c891ce1995-03-14 15:09:05 +0000852
Guido van Rossum1be31752003-01-28 15:19:53 +0000853# Pickling helpers
Guido van Rossuma48061a1995-01-10 00:31:14 +0000854
Guido van Rossumac5b5d22003-01-28 22:01:16 +0000855def _slotnames(cls):
856 """Return a list of slot names for a given class.
857
858 This needs to find slots defined by the class and its bases, so we
859 can't simply return the __slots__ attribute. We must walk down
860 the Method Resolution Order and concatenate the __slots__ of each
861 class found there. (This assumes classes don't modify their
862 __slots__ attribute to misrepresent their slots after the class is
863 defined.)
864 """
865 if not hasattr(cls, "__slots__"):
866 return []
867 names = []
868 for c in cls.__mro__:
869 if "__slots__" in c.__dict__:
870 names += list(c.__dict__["__slots__"])
871 return names
872
Guido van Rossum5ed5c4c1997-09-03 00:23:54 +0000873def _keep_alive(x, memo):
874 """Keeps a reference to the object x in the memo.
875
876 Because we remember objects by their id, we have
877 to assure that possibly temporary objects are kept
878 alive by referencing them.
879 We store a reference at the id of the memo, which should
880 normally not be used unless someone tries to deepcopy
881 the memo itself...
882 """
883 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000884 memo[id(memo)].append(x)
Guido van Rossum5ed5c4c1997-09-03 00:23:54 +0000885 except KeyError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000886 # aha, this is the first one :-)
887 memo[id(memo)]=[x]
Guido van Rossum5ed5c4c1997-09-03 00:23:54 +0000888
889
Tim Petersc0c12b52003-01-29 00:56:17 +0000890# A cache for whichmodule(), mapping a function object to the name of
891# the module in which the function was found.
892
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000893classmap = {} # called classmap for backwards compatibility
Guido van Rossuma48061a1995-01-10 00:31:14 +0000894
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000895def whichmodule(func, funcname):
896 """Figure out the module in which a function occurs.
Guido van Rossuma48061a1995-01-10 00:31:14 +0000897
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000898 Search sys.modules for the module.
899 Cache in classmap.
900 Return a module name.
Tim Petersc0c12b52003-01-29 00:56:17 +0000901 If the function cannot be found, return "__main__".
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000902 """
Jeremy Hylton4f0dcc92003-01-31 18:33:18 +0000903 # Python functions should always get an __module__ from their globals.
904 mod = getattr(func, "__module__", None)
905 if mod is not None:
906 return mod
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000907 if func in classmap:
908 return classmap[func]
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000909
910 for name, module in sys.modules.items():
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000911 if module is None:
Jeremy Hylton065a5ab2002-09-19 22:57:26 +0000912 continue # skip dummy package entries
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000913 if name != '__main__' and \
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000914 hasattr(module, funcname) and \
915 getattr(module, funcname) is func:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000916 break
917 else:
918 name = '__main__'
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000919 classmap[func] = name
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000920 return name
Guido van Rossuma48061a1995-01-10 00:31:14 +0000921
922
Guido van Rossum1be31752003-01-28 15:19:53 +0000923# Unpickling machinery
924
Guido van Rossuma48061a1995-01-10 00:31:14 +0000925class Unpickler:
926
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000927 def __init__(self, file):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000928 """This takes a file-like object for reading a pickle data stream.
929
930 This class automatically determines whether the data stream was
931 written in binary mode or not, so it does not need a flag as in
932 the Pickler class factory.
933
934 The file-like object must have two methods, a read() method that
935 takes an integer argument, and a readline() method that requires no
936 arguments. Both methods should return a string. Thus file-like
937 object can be a file object opened for reading, a StringIO object,
938 or any other custom object that meets this interface.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000939 """
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000940 self.readline = file.readline
941 self.read = file.read
942 self.memo = {}
Guido van Rossuma48061a1995-01-10 00:31:14 +0000943
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000944 def load(self):
Guido van Rossum3a41c612003-01-28 15:10:22 +0000945 """Read a pickled object representation from the open file.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000946
Guido van Rossum3a41c612003-01-28 15:10:22 +0000947 Return the reconstituted object hierarchy specified in the file.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000948 """
Jeremy Hylton20747fa2001-11-09 16:15:04 +0000949 self.mark = object() # any new unique object
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000950 self.stack = []
951 self.append = self.stack.append
952 read = self.read
953 dispatch = self.dispatch
954 try:
955 while 1:
956 key = read(1)
957 dispatch[key](self)
Guido van Rossumff871742000-12-13 18:11:56 +0000958 except _Stop, stopinst:
959 return stopinst.value
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000960
Tim Petersc23d18a2003-01-28 01:41:51 +0000961 # Return largest index k such that self.stack[k] is self.mark.
962 # If the stack doesn't contain a mark, eventually raises IndexError.
963 # This could be sped by maintaining another stack, of indices at which
964 # the mark appears. For that matter, the latter stack would suffice,
965 # and we wouldn't need to push mark objects on self.stack at all.
966 # Doing so is probably a good thing, though, since if the pickle is
967 # corrupt (or hostile) we may get a clue from finding self.mark embedded
968 # in unpickled objects.
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000969 def marker(self):
970 stack = self.stack
971 mark = self.mark
972 k = len(stack)-1
973 while stack[k] is not mark: k = k-1
974 return k
975
976 dispatch = {}
977
978 def load_eof(self):
979 raise EOFError
980 dispatch[''] = load_eof
981
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000982 def load_proto(self):
983 proto = ord(self.read(1))
984 if not 0 <= proto <= 2:
985 raise ValueError, "unsupported pickle protocol: %d" % proto
986 dispatch[PROTO] = load_proto
987
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000988 def load_persid(self):
989 pid = self.readline()[:-1]
990 self.append(self.persistent_load(pid))
991 dispatch[PERSID] = load_persid
992
993 def load_binpersid(self):
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +0000994 pid = self.stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000995 self.append(self.persistent_load(pid))
996 dispatch[BINPERSID] = load_binpersid
997
998 def load_none(self):
999 self.append(None)
1000 dispatch[NONE] = load_none
1001
Guido van Rossum7d97d312003-01-28 04:25:27 +00001002 def load_false(self):
1003 self.append(False)
1004 dispatch[NEWFALSE] = load_false
1005
1006 def load_true(self):
1007 self.append(True)
1008 dispatch[NEWTRUE] = load_true
1009
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001010 def load_int(self):
Tim Peters19ef62d2001-08-28 22:21:18 +00001011 data = self.readline()
Guido van Rossume2763392002-04-05 19:30:08 +00001012 if data == FALSE[1:]:
1013 val = False
1014 elif data == TRUE[1:]:
1015 val = True
1016 else:
1017 try:
1018 val = int(data)
1019 except ValueError:
1020 val = long(data)
1021 self.append(val)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001022 dispatch[INT] = load_int
1023
1024 def load_binint(self):
1025 self.append(mloads('i' + self.read(4)))
1026 dispatch[BININT] = load_binint
1027
1028 def load_binint1(self):
Tim Petersbbf63cd2003-01-27 21:15:36 +00001029 self.append(ord(self.read(1)))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001030 dispatch[BININT1] = load_binint1
1031
1032 def load_binint2(self):
1033 self.append(mloads('i' + self.read(2) + '\000\000'))
1034 dispatch[BININT2] = load_binint2
Tim Peters2344fae2001-01-15 00:50:52 +00001035
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001036 def load_long(self):
Guido van Rossumff871742000-12-13 18:11:56 +00001037 self.append(long(self.readline()[:-1], 0))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001038 dispatch[LONG] = load_long
1039
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001040 def load_long1(self):
1041 n = ord(self.read(1))
1042 bytes = self.read(n)
1043 return decode_long(bytes)
1044 dispatch[LONG1] = load_long1
1045
1046 def load_long4(self):
1047 n = mloads('i' + self.read(4))
1048 bytes = self.read(n)
1049 return decode_long(bytes)
1050 dispatch[LONG4] = load_long4
1051
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001052 def load_float(self):
Guido van Rossumff871742000-12-13 18:11:56 +00001053 self.append(float(self.readline()[:-1]))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001054 dispatch[FLOAT] = load_float
1055
Guido van Rossumd3703791998-10-22 20:15:36 +00001056 def load_binfloat(self, unpack=struct.unpack):
1057 self.append(unpack('>d', self.read(8))[0])
1058 dispatch[BINFLOAT] = load_binfloat
1059
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001060 def load_string(self):
Jeremy Hyltonbe467e52000-09-15 15:14:51 +00001061 rep = self.readline()[:-1]
Tim Petersad5a7712003-01-28 16:23:33 +00001062 for q in "\"'": # double or single quote
Martin v. Löwis8a8da792002-08-14 07:46:28 +00001063 if rep.startswith(q):
1064 if not rep.endswith(q):
1065 raise ValueError, "insecure string pickle"
1066 rep = rep[len(q):-len(q)]
1067 break
1068 else:
Jeremy Hyltonbe467e52000-09-15 15:14:51 +00001069 raise ValueError, "insecure string pickle"
Martin v. Löwis8a8da792002-08-14 07:46:28 +00001070 self.append(rep.decode("string-escape"))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001071 dispatch[STRING] = load_string
1072
1073 def load_binstring(self):
1074 len = mloads('i' + self.read(4))
1075 self.append(self.read(len))
1076 dispatch[BINSTRING] = load_binstring
1077
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +00001078 def load_unicode(self):
1079 self.append(unicode(self.readline()[:-1],'raw-unicode-escape'))
1080 dispatch[UNICODE] = load_unicode
1081
1082 def load_binunicode(self):
1083 len = mloads('i' + self.read(4))
1084 self.append(unicode(self.read(len),'utf-8'))
1085 dispatch[BINUNICODE] = load_binunicode
1086
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001087 def load_short_binstring(self):
Tim Petersbbf63cd2003-01-27 21:15:36 +00001088 len = ord(self.read(1))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001089 self.append(self.read(len))
1090 dispatch[SHORT_BINSTRING] = load_short_binstring
1091
1092 def load_tuple(self):
1093 k = self.marker()
1094 self.stack[k:] = [tuple(self.stack[k+1:])]
1095 dispatch[TUPLE] = load_tuple
1096
1097 def load_empty_tuple(self):
1098 self.stack.append(())
1099 dispatch[EMPTY_TUPLE] = load_empty_tuple
1100
Guido van Rossum44f0ea52003-01-28 04:14:51 +00001101 def load_tuple1(self):
1102 self.stack[-1] = (self.stack[-1],)
1103 dispatch[TUPLE1] = load_tuple1
1104
1105 def load_tuple2(self):
1106 self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
1107 dispatch[TUPLE2] = load_tuple2
1108
1109 def load_tuple3(self):
1110 self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
1111 dispatch[TUPLE3] = load_tuple3
1112
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001113 def load_empty_list(self):
1114 self.stack.append([])
1115 dispatch[EMPTY_LIST] = load_empty_list
1116
1117 def load_empty_dictionary(self):
1118 self.stack.append({})
1119 dispatch[EMPTY_DICT] = load_empty_dictionary
1120
1121 def load_list(self):
1122 k = self.marker()
1123 self.stack[k:] = [self.stack[k+1:]]
1124 dispatch[LIST] = load_list
1125
1126 def load_dict(self):
1127 k = self.marker()
1128 d = {}
1129 items = self.stack[k+1:]
1130 for i in range(0, len(items), 2):
1131 key = items[i]
1132 value = items[i+1]
1133 d[key] = value
1134 self.stack[k:] = [d]
1135 dispatch[DICT] = load_dict
1136
Tim Petersd01c1e92003-01-30 15:41:46 +00001137 # INST and OBJ differ only in how they get a class object. It's not
1138 # only sensible to do the rest in a common routine, the two routines
1139 # previously diverged and grew different bugs.
1140 # klass is the class to instantiate, and k points to the topmost mark
1141 # object, following which are the arguments for klass.__init__.
1142 def _instantiate(self, klass, k):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001143 args = tuple(self.stack[k+1:])
1144 del self.stack[k:]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +00001145 instantiated = 0
Tim Petersd01c1e92003-01-30 15:41:46 +00001146 if (not args and
1147 type(klass) is ClassType and
1148 not hasattr(klass, "__getinitargs__")):
Guido van Rossum45e2fbc1998-03-26 21:13:24 +00001149 try:
1150 value = _EmptyClass()
1151 value.__class__ = klass
Guido van Rossumb19e2a31998-04-13 18:08:45 +00001152 instantiated = 1
Guido van Rossum45e2fbc1998-03-26 21:13:24 +00001153 except RuntimeError:
1154 # In restricted execution, assignment to inst.__class__ is
1155 # prohibited
1156 pass
1157 if not instantiated:
Guido van Rossum743d17e1998-09-15 20:25:57 +00001158 try:
Guido van Rossumb26a97a2003-01-28 22:29:13 +00001159 value = klass(*args)
Guido van Rossum743d17e1998-09-15 20:25:57 +00001160 except TypeError, err:
1161 raise TypeError, "in constructor for %s: %s" % (
1162 klass.__name__, str(err)), sys.exc_info()[2]
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001163 self.append(value)
Tim Petersd01c1e92003-01-30 15:41:46 +00001164
1165 def load_inst(self):
1166 module = self.readline()[:-1]
1167 name = self.readline()[:-1]
1168 klass = self.find_class(module, name)
1169 self._instantiate(klass, self.marker())
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001170 dispatch[INST] = load_inst
1171
1172 def load_obj(self):
Tim Petersd01c1e92003-01-30 15:41:46 +00001173 # Stack is ... markobject classobject arg1 arg2 ...
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001174 k = self.marker()
Tim Petersd01c1e92003-01-30 15:41:46 +00001175 klass = self.stack.pop(k+1)
1176 self._instantiate(klass, k)
Tim Peters2344fae2001-01-15 00:50:52 +00001177 dispatch[OBJ] = load_obj
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001178
Guido van Rossum3a41c612003-01-28 15:10:22 +00001179 def load_newobj(self):
1180 args = self.stack.pop()
1181 cls = self.stack[-1]
1182 obj = cls.__new__(cls, *args)
Guido van Rossum533dbcf2003-01-28 17:55:05 +00001183 self.stack[-1] = obj
Guido van Rossum3a41c612003-01-28 15:10:22 +00001184 dispatch[NEWOBJ] = load_newobj
1185
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001186 def load_global(self):
1187 module = self.readline()[:-1]
1188 name = self.readline()[:-1]
1189 klass = self.find_class(module, name)
1190 self.append(klass)
1191 dispatch[GLOBAL] = load_global
1192
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001193 def load_ext1(self):
1194 code = ord(self.read(1))
1195 self.get_extension(code)
1196 dispatch[EXT1] = load_ext1
1197
1198 def load_ext2(self):
1199 code = mloads('i' + self.read(2) + '\000\000')
1200 self.get_extension(code)
1201 dispatch[EXT2] = load_ext2
1202
1203 def load_ext4(self):
1204 code = mloads('i' + self.read(4))
1205 self.get_extension(code)
1206 dispatch[EXT4] = load_ext4
1207
1208 def get_extension(self, code):
1209 nil = []
1210 obj = extension_cache.get(code, nil)
1211 if obj is not nil:
1212 self.append(obj)
1213 return
1214 key = inverted_registry.get(code)
1215 if not key:
1216 raise ValueError("unregistered extension code %d" % code)
1217 obj = self.find_class(*key)
1218 extension_cache[code] = obj
1219 self.append(obj)
1220
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001221 def find_class(self, module, name):
Guido van Rossumb26a97a2003-01-28 22:29:13 +00001222 # Subclasses may override this
Barry Warsawbf4d9592001-11-15 23:42:58 +00001223 __import__(module)
1224 mod = sys.modules[module]
1225 klass = getattr(mod, name)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001226 return klass
1227
1228 def load_reduce(self):
1229 stack = self.stack
Guido van Rossumb26a97a2003-01-28 22:29:13 +00001230 args = stack.pop()
1231 func = stack[-1]
1232 if args is None:
Guido van Rossumbc64e222003-01-28 16:34:19 +00001233 # A hack for Jim Fulton's ExtensionClass, now deprecated
1234 warnings.warn("__basicnew__ special case is deprecated",
Tim Peters8ac14952002-05-23 15:15:30 +00001235 DeprecationWarning)
Guido van Rossumb26a97a2003-01-28 22:29:13 +00001236 value = func.__basicnew__()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +00001237 else:
Guido van Rossumb26a97a2003-01-28 22:29:13 +00001238 value = func(*args)
1239 stack[-1] = value
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001240 dispatch[REDUCE] = load_reduce
Guido van Rossuma48061a1995-01-10 00:31:14 +00001241
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001242 def load_pop(self):
1243 del self.stack[-1]
1244 dispatch[POP] = load_pop
Guido van Rossum7b5430f1995-03-04 22:25:21 +00001245
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001246 def load_pop_mark(self):
1247 k = self.marker()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +00001248 del self.stack[k:]
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001249 dispatch[POP_MARK] = load_pop_mark
Guido van Rossuma48061a1995-01-10 00:31:14 +00001250
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001251 def load_dup(self):
Guido van Rossumb1062fc1998-03-31 17:00:46 +00001252 self.append(self.stack[-1])
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001253 dispatch[DUP] = load_dup
Guido van Rossuma48061a1995-01-10 00:31:14 +00001254
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001255 def load_get(self):
1256 self.append(self.memo[self.readline()[:-1]])
1257 dispatch[GET] = load_get
Guido van Rossum78536471996-04-12 13:36:27 +00001258
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001259 def load_binget(self):
Tim Petersbbf63cd2003-01-27 21:15:36 +00001260 i = ord(self.read(1))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001261 self.append(self.memo[`i`])
1262 dispatch[BINGET] = load_binget
Guido van Rossum78536471996-04-12 13:36:27 +00001263
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001264 def load_long_binget(self):
1265 i = mloads('i' + self.read(4))
1266 self.append(self.memo[`i`])
1267 dispatch[LONG_BINGET] = load_long_binget
Guido van Rossum78536471996-04-12 13:36:27 +00001268
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001269 def load_put(self):
1270 self.memo[self.readline()[:-1]] = self.stack[-1]
1271 dispatch[PUT] = load_put
Guido van Rossuma48061a1995-01-10 00:31:14 +00001272
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001273 def load_binput(self):
Tim Petersbbf63cd2003-01-27 21:15:36 +00001274 i = ord(self.read(1))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001275 self.memo[`i`] = self.stack[-1]
1276 dispatch[BINPUT] = load_binput
Guido van Rossuma48061a1995-01-10 00:31:14 +00001277
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001278 def load_long_binput(self):
1279 i = mloads('i' + self.read(4))
1280 self.memo[`i`] = self.stack[-1]
1281 dispatch[LONG_BINPUT] = load_long_binput
Guido van Rossuma48061a1995-01-10 00:31:14 +00001282
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001283 def load_append(self):
1284 stack = self.stack
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001285 value = stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001286 list = stack[-1]
1287 list.append(value)
1288 dispatch[APPEND] = load_append
Guido van Rossuma48061a1995-01-10 00:31:14 +00001289
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001290 def load_appends(self):
1291 stack = self.stack
1292 mark = self.marker()
1293 list = stack[mark - 1]
Tim Peters209ad952003-01-28 01:44:45 +00001294 list.extend(stack[mark + 1:])
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001295 del stack[mark:]
1296 dispatch[APPENDS] = load_appends
Tim Peters2344fae2001-01-15 00:50:52 +00001297
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001298 def load_setitem(self):
1299 stack = self.stack
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001300 value = stack.pop()
1301 key = stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001302 dict = stack[-1]
1303 dict[key] = value
1304 dispatch[SETITEM] = load_setitem
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001305
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001306 def load_setitems(self):
1307 stack = self.stack
1308 mark = self.marker()
1309 dict = stack[mark - 1]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +00001310 for i in range(mark + 1, len(stack), 2):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001311 dict[stack[i]] = stack[i + 1]
Guido van Rossuma48061a1995-01-10 00:31:14 +00001312
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001313 del stack[mark:]
1314 dispatch[SETITEMS] = load_setitems
Guido van Rossuma48061a1995-01-10 00:31:14 +00001315
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001316 def load_build(self):
1317 stack = self.stack
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001318 state = stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001319 inst = stack[-1]
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001320 setstate = getattr(inst, "__setstate__", None)
1321 if setstate:
1322 setstate(state)
1323 return
1324 slotstate = None
1325 if isinstance(state, tuple) and len(state) == 2:
1326 state, slotstate = state
1327 if state:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +00001328 try:
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001329 inst.__dict__.update(state)
Guido van Rossum45e2fbc1998-03-26 21:13:24 +00001330 except RuntimeError:
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001331 # XXX In restricted execution, the instance's __dict__
1332 # is not accessible. Use the old way of unpickling
1333 # the instance variables. This is a semantic
1334 # difference when unpickling in restricted
1335 # vs. unrestricted modes.
1336 for k, v in state.items():
Guido van Rossum45e2fbc1998-03-26 21:13:24 +00001337 setattr(inst, k, v)
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001338 if slotstate:
1339 for k, v in slotstate.items():
1340 setattr(inst, k, v)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001341 dispatch[BUILD] = load_build
Guido van Rossuma48061a1995-01-10 00:31:14 +00001342
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001343 def load_mark(self):
1344 self.append(self.mark)
1345 dispatch[MARK] = load_mark
Guido van Rossuma48061a1995-01-10 00:31:14 +00001346
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001347 def load_stop(self):
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001348 value = self.stack.pop()
Guido van Rossumff871742000-12-13 18:11:56 +00001349 raise _Stop(value)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001350 dispatch[STOP] = load_stop
Guido van Rossuma48061a1995-01-10 00:31:14 +00001351
Guido van Rossume467be61997-12-05 19:42:42 +00001352# Helper class for load_inst/load_obj
1353
1354class _EmptyClass:
1355 pass
Guido van Rossuma48061a1995-01-10 00:31:14 +00001356
Tim Peters91149822003-01-31 03:43:58 +00001357# Encode/decode longs in linear time.
1358
1359import binascii as _binascii
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001360
1361def encode_long(x):
Tim Peters91149822003-01-31 03:43:58 +00001362 r"""Encode a long to a two's complement little-endian binary string.
Tim Peters4b23f2b2003-01-31 16:43:39 +00001363 Note that 0L is a special case, returning an empty string, to save a
1364 byte in the LONG1 pickling context.
1365
1366 >>> encode_long(0L)
1367 ''
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001368 >>> encode_long(255L)
1369 '\xff\x00'
1370 >>> encode_long(32767L)
1371 '\xff\x7f'
1372 >>> encode_long(-256L)
1373 '\x00\xff'
1374 >>> encode_long(-32768L)
1375 '\x00\x80'
1376 >>> encode_long(-128L)
1377 '\x80'
1378 >>> encode_long(127L)
1379 '\x7f'
1380 >>>
1381 """
Tim Peters91149822003-01-31 03:43:58 +00001382
1383 if x == 0:
Tim Peters4b23f2b2003-01-31 16:43:39 +00001384 return ''
Tim Peters91149822003-01-31 03:43:58 +00001385 if x > 0:
1386 ashex = hex(x)
1387 assert ashex.startswith("0x")
1388 njunkchars = 2 + ashex.endswith('L')
1389 nibbles = len(ashex) - njunkchars
1390 if nibbles & 1:
1391 # need an even # of nibbles for unhexlify
1392 ashex = "0x0" + ashex[2:]
Tim Peters4b23f2b2003-01-31 16:43:39 +00001393 elif int(ashex[2], 16) >= 8:
Tim Peters91149822003-01-31 03:43:58 +00001394 # "looks negative", so need a byte of sign bits
1395 ashex = "0x00" + ashex[2:]
1396 else:
1397 # Build the 256's-complement: (1L << nbytes) + x. The trick is
1398 # to find the number of bytes in linear time (although that should
1399 # really be a constant-time task).
1400 ashex = hex(-x)
1401 assert ashex.startswith("0x")
1402 njunkchars = 2 + ashex.endswith('L')
1403 nibbles = len(ashex) - njunkchars
1404 if nibbles & 1:
1405 # need an even # of nibbles for unhexlify
1406 nibbles += 1
Tim Peters4b23f2b2003-01-31 16:43:39 +00001407 nbits = nibbles * 4
1408 x += 1L << nbits
Tim Peters91149822003-01-31 03:43:58 +00001409 assert x > 0
1410 ashex = hex(x)
Tim Peters4b23f2b2003-01-31 16:43:39 +00001411 if x >> (nbits - 1) == 0:
Tim Peters91149822003-01-31 03:43:58 +00001412 # "looks positive", so need a byte of sign bits
1413 ashex = "0xff" + x[2:]
1414
1415 if ashex.endswith('L'):
1416 ashex = ashex[2:-1]
1417 else:
1418 ashex = ashex[2:]
1419 assert len(ashex) & 1 == 0
1420 binary = _binascii.unhexlify(ashex)
1421 return binary[::-1]
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001422
1423def decode_long(data):
1424 r"""Decode a long from a two's complement little-endian binary string.
Tim Peters4b23f2b2003-01-31 16:43:39 +00001425
1426 >>> decode_long('')
1427 0L
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001428 >>> decode_long("\xff\x00")
1429 255L
1430 >>> decode_long("\xff\x7f")
1431 32767L
1432 >>> decode_long("\x00\xff")
1433 -256L
1434 >>> decode_long("\x00\x80")
1435 -32768L
1436 >>> decode_long("\x80")
1437 -128L
1438 >>> decode_long("\x7f")
1439 127L
1440 """
Tim Peters91149822003-01-31 03:43:58 +00001441
Tim Peters4b23f2b2003-01-31 16:43:39 +00001442 nbytes = len(data)
1443 if nbytes == 0:
1444 return 0L
Tim Peters91149822003-01-31 03:43:58 +00001445 ashex = _binascii.hexlify(data[::-1])
1446 n = long(ashex, 16)
1447 if data[-1] >= '\x80':
Tim Peters4b23f2b2003-01-31 16:43:39 +00001448 n -= 1L << (nbytes * 8)
Tim Peters91149822003-01-31 03:43:58 +00001449 return n
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001450
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001451# Shorthands
1452
Jeremy Hyltonabe2c622001-10-15 21:29:28 +00001453try:
1454 from cStringIO import StringIO
1455except ImportError:
1456 from StringIO import StringIO
Guido van Rossumc7c5e691996-07-22 22:26:07 +00001457
Guido van Rossum3a41c612003-01-28 15:10:22 +00001458def dump(obj, file, proto=1):
1459 Pickler(file, proto).dump(obj)
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001460
Guido van Rossum3a41c612003-01-28 15:10:22 +00001461def dumps(obj, proto=1):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001462 file = StringIO()
Guido van Rossum3a41c612003-01-28 15:10:22 +00001463 Pickler(file, proto).dump(obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001464 return file.getvalue()
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001465
1466def load(file):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001467 return Unpickler(file).load()
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001468
1469def loads(str):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001470 file = StringIO(str)
1471 return Unpickler(file).load()
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001472
1473# Doctest
1474
1475def _test():
1476 import doctest
1477 return doctest.testmod()
1478
1479if __name__ == "__main__":
1480 _test()