blob: bf5c9513149fdbdcb729a43af31359e7dcee524b [file] [log] [blame]
Guido van Rossum54f22ed2000-02-04 15:10:34 +00001"""Create portable serialized representations of Python objects.
Guido van Rossuma48061a1995-01-10 00:31:14 +00002
Alexandre Vassalottif7fa63d2008-05-11 08:55:36 +00003See module copyreg for a mechanism for registering custom picklers.
Tim Peters22a449a2003-01-27 20:16:36 +00004See module pickletools source for extensive comments.
Guido van Rossuma48061a1995-01-10 00:31:14 +00005
Guido van Rossume467be61997-12-05 19:42:42 +00006Classes:
Guido van Rossuma48061a1995-01-10 00:31:14 +00007
Guido van Rossume467be61997-12-05 19:42:42 +00008 Pickler
9 Unpickler
Guido van Rossuma48061a1995-01-10 00:31:14 +000010
Guido van Rossume467be61997-12-05 19:42:42 +000011Functions:
Guido van Rossuma48061a1995-01-10 00:31:14 +000012
Guido van Rossume467be61997-12-05 19:42:42 +000013 dump(object, file)
14 dumps(object) -> string
15 load(file) -> object
16 loads(string) -> object
Guido van Rossuma48061a1995-01-10 00:31:14 +000017
Guido van Rossume467be61997-12-05 19:42:42 +000018Misc variables:
Guido van Rossuma48061a1995-01-10 00:31:14 +000019
Fred Drakefe82acc1998-02-13 03:24:48 +000020 __version__
Guido van Rossume467be61997-12-05 19:42:42 +000021 format_version
22 compatible_formats
Guido van Rossuma48061a1995-01-10 00:31:14 +000023
Guido van Rossuma48061a1995-01-10 00:31:14 +000024"""
25
Guido van Rossum743d17e1998-09-15 20:25:57 +000026__version__ = "$Revision$" # Code version
Guido van Rossuma48061a1995-01-10 00:31:14 +000027
Guido van Rossum13257902007-06-07 23:15:56 +000028from types import FunctionType, BuiltinFunctionType
Alexandre Vassalottif7fa63d2008-05-11 08:55:36 +000029from copyreg import dispatch_table
30from copyreg import _extension_registry, _inverted_registry, _extension_cache
Guido van Rossumd3703791998-10-22 20:15:36 +000031import marshal
32import sys
33import struct
Skip Montanaro23bafc62001-02-18 03:10:09 +000034import re
Guido van Rossum2e6a4b32007-05-04 19:56:22 +000035import io
Walter Dörwald42748a82007-06-12 16:40:17 +000036import codecs
Guido van Rossuma48061a1995-01-10 00:31:14 +000037
Skip Montanaro352674d2001-02-07 23:14:30 +000038__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
39 "Unpickler", "dump", "dumps", "load", "loads"]
40
Guido van Rossum98297ee2007-11-06 21:34:58 +000041# Shortcut for use in isinstance testing
Alexandre Vassalotti8cb02b62008-05-03 01:42:49 +000042bytes_types = (bytes, bytearray)
Guido van Rossum98297ee2007-11-06 21:34:58 +000043
Tim Petersc0c12b52003-01-29 00:56:17 +000044# These are purely informational; no code uses these.
Guido van Rossumf4169812008-03-17 22:56:06 +000045format_version = "3.0" # File format version we write
Guido van Rossumf29d3d62003-01-27 22:47:53 +000046compatible_formats = ["1.0", # Original protocol 0
Guido van Rossumbc64e222003-01-28 16:34:19 +000047 "1.1", # Protocol 0 with INST added
Guido van Rossumf29d3d62003-01-27 22:47:53 +000048 "1.2", # Original protocol 1
49 "1.3", # Protocol 1 with BINFLOAT added
50 "2.0", # Protocol 2
Guido van Rossumf4169812008-03-17 22:56:06 +000051 "3.0", # Protocol 3
Guido van Rossumf29d3d62003-01-27 22:47:53 +000052 ] # Old format versions we can read
Guido van Rossumb72cf2d1997-04-09 17:32:51 +000053
Guido van Rossum99603b02007-07-20 00:22:32 +000054# This is the highest protocol number we know how to read.
Guido van Rossumf4169812008-03-17 22:56:06 +000055HIGHEST_PROTOCOL = 3
Tim Peters8587b3c2003-02-13 15:44:41 +000056
Guido van Rossum2e6a4b32007-05-04 19:56:22 +000057# The protocol we write by default. May be less than HIGHEST_PROTOCOL.
Guido van Rossumf4169812008-03-17 22:56:06 +000058# We intentionally write a protocol that Python 2.x cannot read;
59# there are too many issues with that.
60DEFAULT_PROTOCOL = 3
Guido van Rossum2e6a4b32007-05-04 19:56:22 +000061
Guido van Rossume0b90422003-01-28 03:17:21 +000062# Why use struct.pack() for pickling but marshal.loads() for
Tim Petersc0c12b52003-01-29 00:56:17 +000063# unpickling? struct.pack() is 40% faster than marshal.dumps(), but
Guido van Rossume0b90422003-01-28 03:17:21 +000064# marshal.loads() is twice as fast as struct.unpack()!
Guido van Rossumb72cf2d1997-04-09 17:32:51 +000065mloads = marshal.loads
Guido van Rossum0c891ce1995-03-14 15:09:05 +000066
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000067class PickleError(Exception):
Neal Norwitzefbb67b2002-05-30 12:12:04 +000068 """A common base class for the other pickling exceptions."""
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000069 pass
70
71class PicklingError(PickleError):
72 """This exception is raised when an unpicklable object is passed to the
73 dump() method.
74
75 """
76 pass
77
78class UnpicklingError(PickleError):
79 """This exception is raised when there is a problem unpickling an object,
80 such as a security violation.
81
82 Note that other exceptions may also be raised during unpickling, including
83 (but not necessarily limited to) AttributeError, EOFError, ImportError,
84 and IndexError.
85
86 """
87 pass
Guido van Rossum7849da81995-03-09 14:08:35 +000088
Tim Petersc0c12b52003-01-29 00:56:17 +000089# An instance of _Stop is raised by Unpickler.load_stop() in response to
90# the STOP opcode, passing the object that is the result of unpickling.
Guido van Rossumff871742000-12-13 18:11:56 +000091class _Stop(Exception):
92 def __init__(self, value):
93 self.value = value
94
Guido van Rossum533dbcf2003-01-28 17:55:05 +000095# Jython has PyStringMap; it's a dict subclass with string keys
Jeremy Hylton2b9d0291998-05-27 22:38:22 +000096try:
97 from org.python.core import PyStringMap
98except ImportError:
99 PyStringMap = None
100
Tim Peters22a449a2003-01-27 20:16:36 +0000101# Pickle opcodes. See pickletools.py for extensive docs. The listing
102# here is in kind-of alphabetical order of 1-character pickle code.
103# pickletools groups them by purpose.
Guido van Rossumdbb718f2001-09-21 19:22:34 +0000104
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000105MARK = b'(' # push special markobject on stack
106STOP = b'.' # every pickle ends with STOP
107POP = b'0' # discard topmost stack item
108POP_MARK = b'1' # discard stack top through topmost markobject
109DUP = b'2' # duplicate top stack item
110FLOAT = b'F' # push float object; decimal string argument
111INT = b'I' # push integer or bool; decimal string argument
112BININT = b'J' # push four-byte signed int
113BININT1 = b'K' # push 1-byte unsigned int
114LONG = b'L' # push long; decimal string argument
115BININT2 = b'M' # push 2-byte unsigned int
116NONE = b'N' # push None
117PERSID = b'P' # push persistent object; id is taken from string arg
118BINPERSID = b'Q' # " " " ; " " " " stack
119REDUCE = b'R' # apply callable to argtuple, both on stack
120STRING = b'S' # push string; NL-terminated string argument
121BINSTRING = b'T' # push string; counted binary string argument
122SHORT_BINSTRING= b'U' # " " ; " " " " < 256 bytes
123UNICODE = b'V' # push Unicode string; raw-unicode-escaped'd argument
124BINUNICODE = b'X' # " " " ; counted UTF-8 string argument
125APPEND = b'a' # append stack top to list below it
126BUILD = b'b' # call __setstate__ or __dict__.update()
127GLOBAL = b'c' # push self.find_class(modname, name); 2 string args
128DICT = b'd' # build a dict from stack items
129EMPTY_DICT = b'}' # push empty dict
130APPENDS = b'e' # extend list on stack by topmost stack slice
131GET = b'g' # push item from memo on stack; index is string arg
132BINGET = b'h' # " " " " " " ; " " 1-byte arg
133INST = b'i' # build & push class instance
134LONG_BINGET = b'j' # push item from memo on stack; index is 4-byte arg
135LIST = b'l' # build list from topmost stack items
136EMPTY_LIST = b']' # push empty list
137OBJ = b'o' # build & push class instance
138PUT = b'p' # store stack top in memo; index is string arg
139BINPUT = b'q' # " " " " " ; " " 1-byte arg
140LONG_BINPUT = b'r' # " " " " " ; " " 4-byte arg
141SETITEM = b's' # add key+value pair to dict
142TUPLE = b't' # build tuple from topmost stack items
143EMPTY_TUPLE = b')' # push empty tuple
144SETITEMS = b'u' # modify dict by adding topmost key+value pairs
145BINFLOAT = b'G' # push float; arg is 8-byte float encoding
Tim Peters22a449a2003-01-27 20:16:36 +0000146
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000147TRUE = b'I01\n' # not an opcode; see INT docs in pickletools.py
148FALSE = b'I00\n' # not an opcode; see INT docs in pickletools.py
Guido van Rossum77f6a652002-04-03 22:41:51 +0000149
Guido van Rossum586c9e82003-01-29 06:16:12 +0000150# Protocol 2
Guido van Rossum5a2d8f52003-01-27 21:44:25 +0000151
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000152PROTO = b'\x80' # identify pickle protocol
153NEWOBJ = b'\x81' # build object by applying cls.__new__ to argtuple
154EXT1 = b'\x82' # push object from extension registry; 1-byte index
155EXT2 = b'\x83' # ditto, but 2-byte index
156EXT4 = b'\x84' # ditto, but 4-byte index
157TUPLE1 = b'\x85' # build 1-tuple from stack top
158TUPLE2 = b'\x86' # build 2-tuple from two topmost stack items
159TUPLE3 = b'\x87' # build 3-tuple from three topmost stack items
160NEWTRUE = b'\x88' # push True
161NEWFALSE = b'\x89' # push False
162LONG1 = b'\x8a' # push long from < 256 bytes
163LONG4 = b'\x8b' # push really big long
Guido van Rossum5a2d8f52003-01-27 21:44:25 +0000164
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000165_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
166
Guido van Rossumf4169812008-03-17 22:56:06 +0000167# Protocol 3 (Python 3.x)
168
169BINBYTES = b'B' # push bytes; counted binary string argument
170SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes
Guido van Rossuma48061a1995-01-10 00:31:14 +0000171
Skip Montanaro23bafc62001-02-18 03:10:09 +0000172__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
173
Guido van Rossum1be31752003-01-28 15:19:53 +0000174
175# Pickling machinery
176
Guido van Rossuma48061a1995-01-10 00:31:14 +0000177class Pickler:
178
Raymond Hettinger3489cad2004-12-05 05:20:42 +0000179 def __init__(self, file, protocol=None):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000180 """This takes a binary file for writing a pickle data stream.
181
182 All protocols now read and write bytes.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000183
Guido van Rossumcf117b02003-02-09 17:19:41 +0000184 The optional protocol argument tells the pickler to use the
185 given protocol; supported protocols are 0, 1, 2. The default
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000186 protocol is 2; it's been supported for many years now.
Guido van Rossumf29d3d62003-01-27 22:47:53 +0000187
188 Protocol 1 is more efficient than protocol 0; protocol 2 is
Guido van Rossum7eff63a2003-01-31 19:42:31 +0000189 more efficient than protocol 1.
Guido van Rossumf29d3d62003-01-27 22:47:53 +0000190
Guido van Rossum7eff63a2003-01-31 19:42:31 +0000191 Specifying a negative protocol version selects the highest
Tim Peters5bd2a792003-02-01 16:45:06 +0000192 protocol version supported. The higher the protocol used, the
193 more recent the version of Python needed to read the pickle
194 produced.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000195
196 The file parameter must have a write() method that accepts a single
197 string argument. It can thus be an open file object, a StringIO
198 object, or any other custom object that meets this interface.
199
200 """
Guido van Rossumcf117b02003-02-09 17:19:41 +0000201 if protocol is None:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000202 protocol = DEFAULT_PROTOCOL
Guido van Rossumcf117b02003-02-09 17:19:41 +0000203 if protocol < 0:
Tim Peters8587b3c2003-02-13 15:44:41 +0000204 protocol = HIGHEST_PROTOCOL
205 elif not 0 <= protocol <= HIGHEST_PROTOCOL:
206 raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000207 self.write = file.write
208 self.memo = {}
Guido van Rossumcf117b02003-02-09 17:19:41 +0000209 self.proto = int(protocol)
210 self.bin = protocol >= 1
Guido van Rossum5d9113d2003-01-29 17:58:45 +0000211 self.fast = 0
Guido van Rossuma48061a1995-01-10 00:31:14 +0000212
Fred Drake7f781c92002-05-01 20:33:53 +0000213 def clear_memo(self):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000214 """Clears the pickler's "memo".
215
216 The memo is the data structure that remembers which objects the
Tim Petersb377f8a2003-01-28 00:23:36 +0000217 pickler has already seen, so that shared or recursive objects are
218 pickled by reference and not by value. This method is useful when
219 re-using picklers.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000220
221 """
Fred Drake7f781c92002-05-01 20:33:53 +0000222 self.memo.clear()
223
Guido van Rossum3a41c612003-01-28 15:10:22 +0000224 def dump(self, obj):
Tim Peters5bd2a792003-02-01 16:45:06 +0000225 """Write a pickled representation of obj to the open file."""
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000226 if self.proto >= 2:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000227 self.write(PROTO + bytes([self.proto]))
Guido van Rossum3a41c612003-01-28 15:10:22 +0000228 self.save(obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000229 self.write(STOP)
Guido van Rossuma48061a1995-01-10 00:31:14 +0000230
Jeremy Hylton3422c992003-01-24 19:29:52 +0000231 def memoize(self, obj):
232 """Store an object in the memo."""
233
Tim Peterse46b73f2003-01-27 21:22:10 +0000234 # The Pickler memo is a dictionary mapping object ids to 2-tuples
235 # that contain the Unpickler memo key and the object being memoized.
236 # The memo key is written to the pickle and will become
Jeremy Hylton3422c992003-01-24 19:29:52 +0000237 # the key in the Unpickler's memo. The object is stored in the
Tim Peterse46b73f2003-01-27 21:22:10 +0000238 # Pickler memo so that transient objects are kept alive during
239 # pickling.
Jeremy Hylton3422c992003-01-24 19:29:52 +0000240
Tim Peterse46b73f2003-01-27 21:22:10 +0000241 # The use of the Unpickler memo length as the memo key is just a
242 # convention. The only requirement is that the memo values be unique.
243 # But there appears no advantage to any other scheme, and this
Tim Peterscbd0a322003-01-28 00:24:43 +0000244 # scheme allows the Unpickler memo to be implemented as a plain (but
Tim Peterse46b73f2003-01-27 21:22:10 +0000245 # growable) array, indexed by memo key.
Guido van Rossum5d9113d2003-01-29 17:58:45 +0000246 if self.fast:
247 return
Guido van Rossum9b40e802003-01-30 06:37:41 +0000248 assert id(obj) not in self.memo
Jeremy Hylton3422c992003-01-24 19:29:52 +0000249 memo_len = len(self.memo)
250 self.write(self.put(memo_len))
Tim Peters518df0d2003-01-28 01:00:38 +0000251 self.memo[id(obj)] = memo_len, obj
Jeremy Hylton3422c992003-01-24 19:29:52 +0000252
Tim Petersbb38e302003-01-27 21:25:41 +0000253 # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
Guido van Rossum5c938d02003-01-28 03:03:08 +0000254 def put(self, i, pack=struct.pack):
Tim Petersc32d8242001-04-10 02:48:53 +0000255 if self.bin:
Tim Petersc32d8242001-04-10 02:48:53 +0000256 if i < 256:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000257 return BINPUT + bytes([i])
Guido van Rossum5c938d02003-01-28 03:03:08 +0000258 else:
259 return LONG_BINPUT + pack("<i", i)
Guido van Rossuma48061a1995-01-10 00:31:14 +0000260
Guido van Rossum39478e82007-08-27 17:23:59 +0000261 return PUT + repr(i).encode("ascii") + b'\n'
Guido van Rossuma48061a1995-01-10 00:31:14 +0000262
Tim Petersbb38e302003-01-27 21:25:41 +0000263 # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
Guido van Rossum5c938d02003-01-28 03:03:08 +0000264 def get(self, i, pack=struct.pack):
Tim Petersc32d8242001-04-10 02:48:53 +0000265 if self.bin:
Tim Petersc32d8242001-04-10 02:48:53 +0000266 if i < 256:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000267 return BINGET + bytes([i])
Guido van Rossum5c938d02003-01-28 03:03:08 +0000268 else:
269 return LONG_BINGET + pack("<i", i)
Guido van Rossuma48061a1995-01-10 00:31:14 +0000270
Guido van Rossum39478e82007-08-27 17:23:59 +0000271 return GET + repr(i).encode("ascii") + b'\n'
Tim Peters2344fae2001-01-15 00:50:52 +0000272
Guido van Rossumac5b5d22003-01-28 22:01:16 +0000273 def save(self, obj):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000274 # Check for persistent id (defined by a subclass)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000275 pid = self.persistent_id(obj)
Guido van Rossumbc64e222003-01-28 16:34:19 +0000276 if pid:
Jeremy Hylton5e0f4e72002-11-13 22:01:27 +0000277 self.save_pers(pid)
278 return
Guido van Rossuma48061a1995-01-10 00:31:14 +0000279
Guido van Rossumbc64e222003-01-28 16:34:19 +0000280 # Check the memo
281 x = self.memo.get(id(obj))
282 if x:
283 self.write(self.get(x[0]))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000284 return
285
Guido van Rossumbc64e222003-01-28 16:34:19 +0000286 # Check the type dispatch table
Guido van Rossum3a41c612003-01-28 15:10:22 +0000287 t = type(obj)
Guido van Rossumbc64e222003-01-28 16:34:19 +0000288 f = self.dispatch.get(t)
289 if f:
290 f(self, obj) # Call unbound method with explicit self
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000291 return
292
Guido van Rossumbc64e222003-01-28 16:34:19 +0000293 # Check for a class with a custom metaclass; treat as regular class
Tim Petersb32a8312003-01-28 00:48:09 +0000294 try:
Guido van Rossum13257902007-06-07 23:15:56 +0000295 issc = issubclass(t, type)
Guido van Rossumbc64e222003-01-28 16:34:19 +0000296 except TypeError: # t is not a class (old Boost; see SF #502085)
Tim Petersb32a8312003-01-28 00:48:09 +0000297 issc = 0
298 if issc:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000299 self.save_global(obj)
Tim Petersb32a8312003-01-28 00:48:09 +0000300 return
301
Alexandre Vassalottif7fa63d2008-05-11 08:55:36 +0000302 # Check copyreg.dispatch_table
Guido van Rossumbc64e222003-01-28 16:34:19 +0000303 reduce = dispatch_table.get(t)
Guido van Rossumc53f0092003-02-18 22:05:12 +0000304 if reduce:
305 rv = reduce(obj)
306 else:
307 # Check for a __reduce_ex__ method, fall back to __reduce__
308 reduce = getattr(obj, "__reduce_ex__", None)
309 if reduce:
310 rv = reduce(self.proto)
311 else:
312 reduce = getattr(obj, "__reduce__", None)
313 if reduce:
314 rv = reduce()
315 else:
316 raise PicklingError("Can't pickle %r object: %r" %
317 (t.__name__, obj))
Tim Petersb32a8312003-01-28 00:48:09 +0000318
Guido van Rossumbc64e222003-01-28 16:34:19 +0000319 # Check for string returned by reduce(), meaning "save as global"
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000320 if isinstance(rv, str):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000321 self.save_global(obj, rv)
Tim Petersb32a8312003-01-28 00:48:09 +0000322 return
323
Guido van Rossumbc64e222003-01-28 16:34:19 +0000324 # Assert that reduce() returned a tuple
Guido van Rossum13257902007-06-07 23:15:56 +0000325 if not isinstance(rv, tuple):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000326 raise PicklingError("%s must return string or tuple" % reduce)
Tim Petersb32a8312003-01-28 00:48:09 +0000327
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000328 # Assert that it returned an appropriately sized tuple
Guido van Rossumbc64e222003-01-28 16:34:19 +0000329 l = len(rv)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000330 if not (2 <= l <= 5):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000331 raise PicklingError("Tuple returned by %s must have "
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000332 "two to five elements" % reduce)
Tim Petersb32a8312003-01-28 00:48:09 +0000333
Guido van Rossumbc64e222003-01-28 16:34:19 +0000334 # Save the reduce() output and finally memoize the object
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000335 self.save_reduce(obj=obj, *rv)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000336
Guido van Rossum3a41c612003-01-28 15:10:22 +0000337 def persistent_id(self, obj):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000338 # This exists so a subclass can override it
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000339 return None
340
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000341 def save_pers(self, pid):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000342 # Save a persistent id reference
Tim Petersbd1cdb92003-01-28 01:03:10 +0000343 if self.bin:
Jeremy Hylton5e0f4e72002-11-13 22:01:27 +0000344 self.save(pid)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000345 self.write(BINPERSID)
Tim Petersbd1cdb92003-01-28 01:03:10 +0000346 else:
Guido van Rossum39478e82007-08-27 17:23:59 +0000347 self.write(PERSID + str(pid).encode("ascii") + b'\n')
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000348
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000349 def save_reduce(self, func, args, state=None,
350 listitems=None, dictitems=None, obj=None):
Jeremy Hyltone3a565e2003-06-29 16:59:59 +0000351 # This API is called by some subclasses
Guido van Rossumbc64e222003-01-28 16:34:19 +0000352
353 # Assert that args is a tuple or None
Guido van Rossum13257902007-06-07 23:15:56 +0000354 if not isinstance(args, tuple):
Raymond Hettingera6b45cc2004-12-07 07:05:57 +0000355 raise PicklingError("args from reduce() should be a tuple")
Guido van Rossumbc64e222003-01-28 16:34:19 +0000356
357 # Assert that func is callable
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000358 if not hasattr(func, '__call__'):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000359 raise PicklingError("func from reduce should be callable")
360
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000361 save = self.save
Guido van Rossumbc64e222003-01-28 16:34:19 +0000362 write = self.write
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000363
Guido van Rossumd053b4b2003-01-31 16:51:45 +0000364 # Protocol 2 special case: if func's name is __newobj__, use NEWOBJ
365 if self.proto >= 2 and getattr(func, "__name__", "") == "__newobj__":
366 # A __reduce__ implementation can direct protocol 2 to
367 # use the more efficient NEWOBJ opcode, while still
368 # allowing protocol 0 and 1 to work normally. For this to
369 # work, the function returned by __reduce__ should be
370 # called __newobj__, and its first argument should be a
371 # new-style class. The implementation for __newobj__
372 # should be as follows, although pickle has no way to
373 # verify this:
374 #
375 # def __newobj__(cls, *args):
376 # return cls.__new__(cls, *args)
377 #
378 # Protocols 0 and 1 will pickle a reference to __newobj__,
379 # while protocol 2 (and above) will pickle a reference to
380 # cls, the remaining args tuple, and the NEWOBJ code,
381 # which calls cls.__new__(cls, *args) at unpickling time
382 # (see load_newobj below). If __reduce__ returns a
383 # three-tuple, the state from the third tuple item will be
384 # pickled regardless of the protocol, calling __setstate__
385 # at unpickling time (see load_build below).
386 #
387 # Note that no standard __newobj__ implementation exists;
388 # you have to provide your own. This is to enforce
389 # compatibility with Python 2.2 (pickles written using
390 # protocol 0 or 1 in Python 2.3 should be unpicklable by
391 # Python 2.2).
392 cls = args[0]
393 if not hasattr(cls, "__new__"):
394 raise PicklingError(
395 "args[0] from __newobj__ args has no __new__")
Guido van Rossumf7f45172003-01-31 17:17:49 +0000396 if obj is not None and cls is not obj.__class__:
397 raise PicklingError(
398 "args[0] from __newobj__ args has the wrong class")
Guido van Rossumd053b4b2003-01-31 16:51:45 +0000399 args = args[1:]
400 save(cls)
401 save(args)
402 write(NEWOBJ)
403 else:
404 save(func)
405 save(args)
406 write(REDUCE)
Tim Peters2344fae2001-01-15 00:50:52 +0000407
Guido van Rossumf7f45172003-01-31 17:17:49 +0000408 if obj is not None:
409 self.memoize(obj)
410
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000411 # More new special cases (that work with older protocols as
412 # well): when __reduce__ returns a tuple with 4 or 5 items,
413 # the 4th and 5th item should be iterators that provide list
414 # items and dict items (as (key, value) tuples), or None.
415
416 if listitems is not None:
417 self._batch_appends(listitems)
418
419 if dictitems is not None:
420 self._batch_setitems(dictitems)
421
Tim Petersc32d8242001-04-10 02:48:53 +0000422 if state is not None:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000423 save(state)
424 write(BUILD)
425
Guido van Rossumbc64e222003-01-28 16:34:19 +0000426 # Methods below this point are dispatched through the dispatch table
427
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000428 dispatch = {}
429
Guido van Rossum3a41c612003-01-28 15:10:22 +0000430 def save_none(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000431 self.write(NONE)
Guido van Rossum13257902007-06-07 23:15:56 +0000432 dispatch[type(None)] = save_none
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000433
Guido van Rossum3a41c612003-01-28 15:10:22 +0000434 def save_bool(self, obj):
Guido van Rossum7d97d312003-01-28 04:25:27 +0000435 if self.proto >= 2:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000436 self.write(obj and NEWTRUE or NEWFALSE)
Guido van Rossum7d97d312003-01-28 04:25:27 +0000437 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000438 self.write(obj and TRUE or FALSE)
Guido van Rossum77f6a652002-04-03 22:41:51 +0000439 dispatch[bool] = save_bool
440
Guido van Rossum3a41c612003-01-28 15:10:22 +0000441 def save_int(self, obj, pack=struct.pack):
Tim Petersc32d8242001-04-10 02:48:53 +0000442 if self.bin:
Tim Peters44714002001-04-10 05:02:52 +0000443 # If the int is small enough to fit in a signed 4-byte 2's-comp
444 # format, we can store it more efficiently than the general
445 # case.
Guido van Rossum5c938d02003-01-28 03:03:08 +0000446 # First one- and two-byte unsigned ints:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000447 if obj >= 0:
448 if obj <= 0xff:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000449 self.write(BININT1 + bytes([obj]))
Guido van Rossum5c938d02003-01-28 03:03:08 +0000450 return
Guido van Rossum3a41c612003-01-28 15:10:22 +0000451 if obj <= 0xffff:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000452 self.write(BININT2 + bytes([obj&0xff, obj>>8]))
Guido van Rossum5c938d02003-01-28 03:03:08 +0000453 return
454 # Next check for 4-byte signed ints:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000455 high_bits = obj >> 31 # note that Python shift sign-extends
Tim Petersd95c2df2003-01-28 03:41:54 +0000456 if high_bits == 0 or high_bits == -1:
Tim Peters44714002001-04-10 05:02:52 +0000457 # All high bits are copies of bit 2**31, so the value
458 # fits in a 4-byte signed int.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000459 self.write(BININT + pack("<i", obj))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000460 return
Tim Peters44714002001-04-10 05:02:52 +0000461 # Text pickle, or int too big to fit in signed 4-byte format.
Guido van Rossum39478e82007-08-27 17:23:59 +0000462 self.write(INT + repr(obj).encode("ascii") + b'\n')
Guido van Rossumddefaf32007-01-14 03:31:43 +0000463 # XXX save_int is merged into save_long
Guido van Rossum13257902007-06-07 23:15:56 +0000464 # dispatch[int] = save_int
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000465
Guido van Rossum3a41c612003-01-28 15:10:22 +0000466 def save_long(self, obj, pack=struct.pack):
Guido van Rossumddefaf32007-01-14 03:31:43 +0000467 if self.bin:
468 # If the int is small enough to fit in a signed 4-byte 2's-comp
469 # format, we can store it more efficiently than the general
470 # case.
471 # First one- and two-byte unsigned ints:
472 if obj >= 0:
473 if obj <= 0xff:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000474 self.write(BININT1 + bytes([obj]))
Guido van Rossumddefaf32007-01-14 03:31:43 +0000475 return
476 if obj <= 0xffff:
Guido van Rossumcfe5f202007-05-08 21:26:54 +0000477 self.write(BININT2 + bytes([obj&0xff, obj>>8]))
Guido van Rossumddefaf32007-01-14 03:31:43 +0000478 return
479 # Next check for 4-byte signed ints:
480 high_bits = obj >> 31 # note that Python shift sign-extends
481 if high_bits == 0 or high_bits == -1:
482 # All high bits are copies of bit 2**31, so the value
483 # fits in a 4-byte signed int.
484 self.write(BININT + pack("<i", obj))
485 return
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000486 if self.proto >= 2:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000487 encoded = encode_long(obj)
488 n = len(encoded)
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000489 if n < 256:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000490 self.write(LONG1 + bytes([n]) + encoded)
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000491 else:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000492 self.write(LONG4 + pack("<i", n) + encoded)
Tim Petersee1a53c2003-02-02 02:57:53 +0000493 return
Guido van Rossum39478e82007-08-27 17:23:59 +0000494 self.write(LONG + repr(obj).encode("ascii") + b'\n')
Guido van Rossum13257902007-06-07 23:15:56 +0000495 dispatch[int] = save_long
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000496
Guido van Rossum3a41c612003-01-28 15:10:22 +0000497 def save_float(self, obj, pack=struct.pack):
Guido van Rossumd3703791998-10-22 20:15:36 +0000498 if self.bin:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000499 self.write(BINFLOAT + pack('>d', obj))
Guido van Rossumd3703791998-10-22 20:15:36 +0000500 else:
Guido van Rossum39478e82007-08-27 17:23:59 +0000501 self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
Guido van Rossum13257902007-06-07 23:15:56 +0000502 dispatch[float] = save_float
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000503
Guido van Rossumf4169812008-03-17 22:56:06 +0000504 def save_bytes(self, obj, pack=struct.pack):
505 if self.proto < 3:
506 self.save_reduce(bytes, (list(obj),))
507 return
508 n = len(obj)
509 if n < 256:
510 self.write(SHORT_BINBYTES + bytes([n]) + bytes(obj))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000511 else:
Guido van Rossumf4169812008-03-17 22:56:06 +0000512 self.write(BINBYTES + pack("<i", n) + bytes(obj))
Guido van Rossum3a41c612003-01-28 15:10:22 +0000513 self.memoize(obj)
Guido van Rossumf4169812008-03-17 22:56:06 +0000514 dispatch[bytes] = save_bytes
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000515
Guido van Rossumf4169812008-03-17 22:56:06 +0000516 def save_str(self, obj, pack=struct.pack):
Tim Petersc32d8242001-04-10 02:48:53 +0000517 if self.bin:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000518 encoded = obj.encode('utf-8')
519 n = len(encoded)
520 self.write(BINUNICODE + pack("<i", n) + encoded)
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +0000521 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000522 obj = obj.replace("\\", "\\u005c")
523 obj = obj.replace("\n", "\\u000a")
Guido van Rossum1255ed62007-05-04 20:30:19 +0000524 self.write(UNICODE + bytes(obj.encode('raw-unicode-escape')) +
525 b'\n')
Guido van Rossum3a41c612003-01-28 15:10:22 +0000526 self.memoize(obj)
Guido van Rossumf4169812008-03-17 22:56:06 +0000527 dispatch[str] = save_str
Tim Peters658cba62001-02-09 20:06:00 +0000528
Guido van Rossum3a41c612003-01-28 15:10:22 +0000529 def save_tuple(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000530 write = self.write
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000531 proto = self.proto
532
Guido van Rossum3a41c612003-01-28 15:10:22 +0000533 n = len(obj)
Tim Peters1d63c9f2003-02-02 20:29:39 +0000534 if n == 0:
535 if proto:
536 write(EMPTY_TUPLE)
537 else:
538 write(MARK + TUPLE)
Tim Petersd97da802003-01-28 05:48:29 +0000539 return
540
541 save = self.save
542 memo = self.memo
543 if n <= 3 and proto >= 2:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000544 for element in obj:
Tim Petersd97da802003-01-28 05:48:29 +0000545 save(element)
546 # Subtle. Same as in the big comment below.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000547 if id(obj) in memo:
548 get = self.get(memo[id(obj)][0])
Tim Petersd97da802003-01-28 05:48:29 +0000549 write(POP * n + get)
550 else:
551 write(_tuplesize2code[n])
Guido van Rossum3a41c612003-01-28 15:10:22 +0000552 self.memoize(obj)
Tim Petersd97da802003-01-28 05:48:29 +0000553 return
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000554
Tim Peters1d63c9f2003-02-02 20:29:39 +0000555 # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
Tim Petersff57bff2003-01-28 05:34:53 +0000556 # has more than 3 elements.
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000557 write(MARK)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000558 for element in obj:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000559 save(element)
560
Tim Peters1d63c9f2003-02-02 20:29:39 +0000561 if id(obj) in memo:
Tim Petersf558da02003-01-28 02:09:55 +0000562 # Subtle. d was not in memo when we entered save_tuple(), so
563 # the process of saving the tuple's elements must have saved
564 # the tuple itself: the tuple is recursive. The proper action
565 # now is to throw away everything we put on the stack, and
566 # simply GET the tuple (it's already constructed). This check
567 # could have been done in the "for element" loop instead, but
568 # recursive tuples are a rare thing.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000569 get = self.get(memo[id(obj)][0])
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000570 if proto:
Tim Petersf558da02003-01-28 02:09:55 +0000571 write(POP_MARK + get)
572 else: # proto 0 -- POP_MARK not available
Tim Petersd97da802003-01-28 05:48:29 +0000573 write(POP * (n+1) + get)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000574 return
575
Tim Peters1d63c9f2003-02-02 20:29:39 +0000576 # No recursion.
Tim Peters518df0d2003-01-28 01:00:38 +0000577 self.write(TUPLE)
Tim Peters1d63c9f2003-02-02 20:29:39 +0000578 self.memoize(obj)
Jeremy Hylton3422c992003-01-24 19:29:52 +0000579
Guido van Rossum13257902007-06-07 23:15:56 +0000580 dispatch[tuple] = save_tuple
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000581
Tim Petersa6ae9a22003-01-28 16:58:41 +0000582 # save_empty_tuple() isn't used by anything in Python 2.3. However, I
583 # found a Pickler subclass in Zope3 that calls it, so it's not harmless
584 # to remove it.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000585 def save_empty_tuple(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000586 self.write(EMPTY_TUPLE)
587
Guido van Rossum3a41c612003-01-28 15:10:22 +0000588 def save_list(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000589 write = self.write
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000590
Tim Petersc32d8242001-04-10 02:48:53 +0000591 if self.bin:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000592 write(EMPTY_LIST)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000593 else: # proto 0 -- can't use EMPTY_LIST
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000594 write(MARK + LIST)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000595
596 self.memoize(obj)
597 self._batch_appends(iter(obj))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000598
Guido van Rossum13257902007-06-07 23:15:56 +0000599 dispatch[list] = save_list
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000600
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000601 _BATCHSIZE = 1000
602
603 def _batch_appends(self, items):
604 # Helper to batch up APPENDS sequences
605 save = self.save
606 write = self.write
607
608 if not self.bin:
609 for x in items:
610 save(x)
611 write(APPEND)
612 return
613
Guido van Rossum805365e2007-05-07 22:24:25 +0000614 r = range(self._BATCHSIZE)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000615 while items is not None:
616 tmp = []
617 for i in r:
618 try:
Georg Brandla18af4e2007-04-21 15:47:16 +0000619 x = next(items)
Guido van Rossum5aac4e62003-02-06 22:57:00 +0000620 tmp.append(x)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000621 except StopIteration:
622 items = None
623 break
624 n = len(tmp)
625 if n > 1:
626 write(MARK)
627 for x in tmp:
628 save(x)
629 write(APPENDS)
630 elif n:
631 save(tmp[0])
632 write(APPEND)
633 # else tmp is empty, and we're done
634
Guido van Rossum3a41c612003-01-28 15:10:22 +0000635 def save_dict(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000636 write = self.write
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000637
Tim Petersc32d8242001-04-10 02:48:53 +0000638 if self.bin:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000639 write(EMPTY_DICT)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000640 else: # proto 0 -- can't use EMPTY_DICT
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000641 write(MARK + DICT)
642
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000643 self.memoize(obj)
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000644 self._batch_setitems(iter(obj.items()))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000645
Guido van Rossum13257902007-06-07 23:15:56 +0000646 dispatch[dict] = save_dict
647 if PyStringMap is not None:
Jeremy Hylton2b9d0291998-05-27 22:38:22 +0000648 dispatch[PyStringMap] = save_dict
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000649
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000650 def _batch_setitems(self, items):
651 # Helper to batch up SETITEMS sequences; proto >= 1 only
652 save = self.save
653 write = self.write
654
655 if not self.bin:
656 for k, v in items:
657 save(k)
658 save(v)
659 write(SETITEM)
660 return
661
Guido van Rossum805365e2007-05-07 22:24:25 +0000662 r = range(self._BATCHSIZE)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000663 while items is not None:
664 tmp = []
665 for i in r:
666 try:
Georg Brandla18af4e2007-04-21 15:47:16 +0000667 tmp.append(next(items))
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000668 except StopIteration:
669 items = None
670 break
671 n = len(tmp)
672 if n > 1:
673 write(MARK)
674 for k, v in tmp:
675 save(k)
676 save(v)
677 write(SETITEMS)
678 elif n:
679 k, v = tmp[0]
680 save(k)
681 save(v)
682 write(SETITEM)
683 # else tmp is empty, and we're done
684
Guido van Rossum255f3ee2003-01-29 06:14:11 +0000685 def save_global(self, obj, name=None, pack=struct.pack):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000686 write = self.write
687 memo = self.memo
688
Tim Petersc32d8242001-04-10 02:48:53 +0000689 if name is None:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000690 name = obj.__name__
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000691
Jeremy Hylton4f0dcc92003-01-31 18:33:18 +0000692 module = getattr(obj, "__module__", None)
693 if module is None:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000694 module = whichmodule(obj, name)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000695
Guido van Rossumb0a98e92001-08-17 18:49:52 +0000696 try:
697 __import__(module)
698 mod = sys.modules[module]
699 klass = getattr(mod, name)
700 except (ImportError, KeyError, AttributeError):
701 raise PicklingError(
702 "Can't pickle %r: it's not found as %s.%s" %
Guido van Rossum3a41c612003-01-28 15:10:22 +0000703 (obj, module, name))
Guido van Rossumb0a98e92001-08-17 18:49:52 +0000704 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000705 if klass is not obj:
Guido van Rossumb0a98e92001-08-17 18:49:52 +0000706 raise PicklingError(
707 "Can't pickle %r: it's not the same object as %s.%s" %
Guido van Rossum3a41c612003-01-28 15:10:22 +0000708 (obj, module, name))
Guido van Rossumb0a98e92001-08-17 18:49:52 +0000709
Guido van Rossum255f3ee2003-01-29 06:14:11 +0000710 if self.proto >= 2:
Guido van Rossumd4b920c2003-02-04 01:54:49 +0000711 code = _extension_registry.get((module, name))
Guido van Rossum255f3ee2003-01-29 06:14:11 +0000712 if code:
713 assert code > 0
714 if code <= 0xff:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000715 write(EXT1 + bytes([code]))
Guido van Rossum255f3ee2003-01-29 06:14:11 +0000716 elif code <= 0xffff:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000717 write(EXT2 + bytes([code&0xff, code>>8]))
Guido van Rossum255f3ee2003-01-29 06:14:11 +0000718 else:
719 write(EXT4 + pack("<i", code))
720 return
721
Guido van Rossum39478e82007-08-27 17:23:59 +0000722 write(GLOBAL + bytes(module, "utf-8") + b'\n' +
723 bytes(name, "utf-8") + b'\n')
Guido van Rossum3a41c612003-01-28 15:10:22 +0000724 self.memoize(obj)
Tim Peters3b769832003-01-28 03:51:36 +0000725
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000726 dispatch[FunctionType] = save_global
727 dispatch[BuiltinFunctionType] = save_global
Guido van Rossum13257902007-06-07 23:15:56 +0000728 dispatch[type] = save_global
Guido van Rossum0c891ce1995-03-14 15:09:05 +0000729
Guido van Rossum1be31752003-01-28 15:19:53 +0000730# Pickling helpers
Guido van Rossuma48061a1995-01-10 00:31:14 +0000731
Guido van Rossum5ed5c4c1997-09-03 00:23:54 +0000732def _keep_alive(x, memo):
733 """Keeps a reference to the object x in the memo.
734
735 Because we remember objects by their id, we have
736 to assure that possibly temporary objects are kept
737 alive by referencing them.
738 We store a reference at the id of the memo, which should
739 normally not be used unless someone tries to deepcopy
740 the memo itself...
741 """
742 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000743 memo[id(memo)].append(x)
Guido van Rossum5ed5c4c1997-09-03 00:23:54 +0000744 except KeyError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000745 # aha, this is the first one :-)
746 memo[id(memo)]=[x]
Guido van Rossum5ed5c4c1997-09-03 00:23:54 +0000747
748
Tim Petersc0c12b52003-01-29 00:56:17 +0000749# A cache for whichmodule(), mapping a function object to the name of
750# the module in which the function was found.
751
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000752classmap = {} # called classmap for backwards compatibility
Guido van Rossuma48061a1995-01-10 00:31:14 +0000753
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000754def whichmodule(func, funcname):
755 """Figure out the module in which a function occurs.
Guido van Rossuma48061a1995-01-10 00:31:14 +0000756
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000757 Search sys.modules for the module.
758 Cache in classmap.
759 Return a module name.
Tim Petersc0c12b52003-01-29 00:56:17 +0000760 If the function cannot be found, return "__main__".
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000761 """
Jeremy Hylton4f0dcc92003-01-31 18:33:18 +0000762 # Python functions should always get an __module__ from their globals.
763 mod = getattr(func, "__module__", None)
764 if mod is not None:
765 return mod
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000766 if func in classmap:
767 return classmap[func]
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000768
Guido van Rossum634e53f2007-02-26 07:07:02 +0000769 for name, module in list(sys.modules.items()):
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000770 if module is None:
Jeremy Hylton065a5ab2002-09-19 22:57:26 +0000771 continue # skip dummy package entries
Jeremy Hyltoncc1fccb2003-02-06 16:23:01 +0000772 if name != '__main__' and getattr(module, funcname, None) is func:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000773 break
774 else:
775 name = '__main__'
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000776 classmap[func] = name
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000777 return name
Guido van Rossuma48061a1995-01-10 00:31:14 +0000778
779
Guido van Rossum1be31752003-01-28 15:19:53 +0000780# Unpickling machinery
781
Guido van Rossuma48061a1995-01-10 00:31:14 +0000782class Unpickler:
783
Guido van Rossumf4169812008-03-17 22:56:06 +0000784 def __init__(self, file, *, encoding="ASCII", errors="strict"):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000785 """This takes a binary file for reading a pickle data stream.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000786
Tim Peters5bd2a792003-02-01 16:45:06 +0000787 The protocol version of the pickle is detected automatically, so no
788 proto argument is needed.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000789
Guido van Rossumfeea0782007-10-10 18:00:50 +0000790 The file-like object must have two methods, a read() method
791 that takes an integer argument, and a readline() method that
792 requires no arguments. Both methods should return bytes.
793 Thus file-like object can be a binary file object opened for
794 reading, a BytesIO object, or any other custom object that
795 meets this interface.
Guido van Rossumf4169812008-03-17 22:56:06 +0000796
797 Optional keyword arguments are encoding and errors, which are
798 used to decode 8-bit string instances pickled by Python 2.x.
799 These default to 'ASCII' and 'strict', respectively.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000800 """
Guido van Rossumfeea0782007-10-10 18:00:50 +0000801 self.readline = file.readline
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000802 self.read = file.read
803 self.memo = {}
Guido van Rossumf4169812008-03-17 22:56:06 +0000804 self.encoding = encoding
805 self.errors = errors
Guido van Rossuma48061a1995-01-10 00:31:14 +0000806
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000807 def load(self):
Guido van Rossum3a41c612003-01-28 15:10:22 +0000808 """Read a pickled object representation from the open file.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000809
Guido van Rossum3a41c612003-01-28 15:10:22 +0000810 Return the reconstituted object hierarchy specified in the file.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000811 """
Jeremy Hylton20747fa2001-11-09 16:15:04 +0000812 self.mark = object() # any new unique object
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000813 self.stack = []
814 self.append = self.stack.append
815 read = self.read
816 dispatch = self.dispatch
817 try:
818 while 1:
819 key = read(1)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000820 if not key:
821 raise EOFError
Guido van Rossum98297ee2007-11-06 21:34:58 +0000822 assert isinstance(key, bytes_types)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000823 dispatch[key[0]](self)
Guido van Rossumb940e112007-01-10 16:19:56 +0000824 except _Stop as stopinst:
Guido van Rossumff871742000-12-13 18:11:56 +0000825 return stopinst.value
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000826
Tim Petersc23d18a2003-01-28 01:41:51 +0000827 # Return largest index k such that self.stack[k] is self.mark.
828 # If the stack doesn't contain a mark, eventually raises IndexError.
829 # This could be sped by maintaining another stack, of indices at which
830 # the mark appears. For that matter, the latter stack would suffice,
831 # and we wouldn't need to push mark objects on self.stack at all.
832 # Doing so is probably a good thing, though, since if the pickle is
833 # corrupt (or hostile) we may get a clue from finding self.mark embedded
834 # in unpickled objects.
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000835 def marker(self):
836 stack = self.stack
837 mark = self.mark
838 k = len(stack)-1
839 while stack[k] is not mark: k = k-1
840 return k
841
842 dispatch = {}
843
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000844 def load_proto(self):
845 proto = ord(self.read(1))
Guido van Rossumf4169812008-03-17 22:56:06 +0000846 if not 0 <= proto <= HIGHEST_PROTOCOL:
Guido van Rossum26d95c32007-08-27 23:18:54 +0000847 raise ValueError("unsupported pickle protocol: %d" % proto)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000848 dispatch[PROTO[0]] = load_proto
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000849
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000850 def load_persid(self):
851 pid = self.readline()[:-1]
852 self.append(self.persistent_load(pid))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000853 dispatch[PERSID[0]] = load_persid
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000854
855 def load_binpersid(self):
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +0000856 pid = self.stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000857 self.append(self.persistent_load(pid))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000858 dispatch[BINPERSID[0]] = load_binpersid
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000859
860 def load_none(self):
861 self.append(None)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000862 dispatch[NONE[0]] = load_none
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000863
Guido van Rossum7d97d312003-01-28 04:25:27 +0000864 def load_false(self):
865 self.append(False)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000866 dispatch[NEWFALSE[0]] = load_false
Guido van Rossum7d97d312003-01-28 04:25:27 +0000867
868 def load_true(self):
869 self.append(True)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000870 dispatch[NEWTRUE[0]] = load_true
Guido van Rossum7d97d312003-01-28 04:25:27 +0000871
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000872 def load_int(self):
Tim Peters19ef62d2001-08-28 22:21:18 +0000873 data = self.readline()
Guido van Rossume2763392002-04-05 19:30:08 +0000874 if data == FALSE[1:]:
875 val = False
876 elif data == TRUE[1:]:
877 val = True
878 else:
879 try:
880 val = int(data)
881 except ValueError:
Guido van Rossume2a383d2007-01-15 16:59:06 +0000882 val = int(data)
Guido van Rossume2763392002-04-05 19:30:08 +0000883 self.append(val)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000884 dispatch[INT[0]] = load_int
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000885
886 def load_binint(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000887 self.append(mloads(b'i' + self.read(4)))
888 dispatch[BININT[0]] = load_binint
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000889
890 def load_binint1(self):
Tim Petersbbf63cd2003-01-27 21:15:36 +0000891 self.append(ord(self.read(1)))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000892 dispatch[BININT1[0]] = load_binint1
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000893
894 def load_binint2(self):
Guido van Rossumcfe5f202007-05-08 21:26:54 +0000895 self.append(mloads(b'i' + self.read(2) + b'\000\000'))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000896 dispatch[BININT2[0]] = load_binint2
Tim Peters2344fae2001-01-15 00:50:52 +0000897
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000898 def load_long(self):
Guido van Rossumfeea0782007-10-10 18:00:50 +0000899 val = self.readline()[:-1].decode("ascii")
900 self.append(int(val, 0))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000901 dispatch[LONG[0]] = load_long
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000902
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000903 def load_long1(self):
904 n = ord(self.read(1))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000905 data = self.read(n)
906 self.append(decode_long(data))
907 dispatch[LONG1[0]] = load_long1
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000908
909 def load_long4(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000910 n = mloads(b'i' + self.read(4))
911 data = self.read(n)
912 self.append(decode_long(data))
913 dispatch[LONG4[0]] = load_long4
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000914
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000915 def load_float(self):
Guido van Rossumff871742000-12-13 18:11:56 +0000916 self.append(float(self.readline()[:-1]))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000917 dispatch[FLOAT[0]] = load_float
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000918
Guido van Rossumd3703791998-10-22 20:15:36 +0000919 def load_binfloat(self, unpack=struct.unpack):
920 self.append(unpack('>d', self.read(8))[0])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000921 dispatch[BINFLOAT[0]] = load_binfloat
Guido van Rossumd3703791998-10-22 20:15:36 +0000922
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000923 def load_string(self):
Guido van Rossum98297ee2007-11-06 21:34:58 +0000924 orig = self.readline()
925 rep = orig[:-1]
Guido van Rossum26d95c32007-08-27 23:18:54 +0000926 for q in (b'"', b"'"): # double or single quote
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000927 if rep.startswith(q):
928 if not rep.endswith(q):
Guido van Rossum26d95c32007-08-27 23:18:54 +0000929 raise ValueError("insecure string pickle")
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000930 rep = rep[len(q):-len(q)]
931 break
932 else:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000933 raise ValueError("insecure string pickle: %r" % orig)
934 self.append(codecs.escape_decode(rep)[0])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000935 dispatch[STRING[0]] = load_string
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000936
937 def load_binstring(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000938 len = mloads(b'i' + self.read(4))
Guido van Rossumf4169812008-03-17 22:56:06 +0000939 data = self.read(len)
940 value = str(data, self.encoding, self.errors)
941 self.append(value)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000942 dispatch[BINSTRING[0]] = load_binstring
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000943
Guido van Rossumf4169812008-03-17 22:56:06 +0000944 def load_binbytes(self):
945 len = mloads(b'i' + self.read(4))
946 self.append(self.read(len))
947 dispatch[BINBYTES[0]] = load_binbytes
948
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +0000949 def load_unicode(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000950 self.append(str(self.readline()[:-1], 'raw-unicode-escape'))
951 dispatch[UNICODE[0]] = load_unicode
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +0000952
953 def load_binunicode(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000954 len = mloads(b'i' + self.read(4))
955 self.append(str(self.read(len), 'utf-8'))
956 dispatch[BINUNICODE[0]] = load_binunicode
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +0000957
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000958 def load_short_binstring(self):
Tim Petersbbf63cd2003-01-27 21:15:36 +0000959 len = ord(self.read(1))
Guido van Rossumf4169812008-03-17 22:56:06 +0000960 data = bytes(self.read(len))
961 value = str(data, self.encoding, self.errors)
962 self.append(value)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000963 dispatch[SHORT_BINSTRING[0]] = load_short_binstring
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000964
Guido van Rossumf4169812008-03-17 22:56:06 +0000965 def load_short_binbytes(self):
966 len = ord(self.read(1))
967 self.append(bytes(self.read(len)))
968 dispatch[SHORT_BINBYTES[0]] = load_short_binbytes
969
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000970 def load_tuple(self):
971 k = self.marker()
972 self.stack[k:] = [tuple(self.stack[k+1:])]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000973 dispatch[TUPLE[0]] = load_tuple
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000974
975 def load_empty_tuple(self):
976 self.stack.append(())
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000977 dispatch[EMPTY_TUPLE[0]] = load_empty_tuple
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000978
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000979 def load_tuple1(self):
980 self.stack[-1] = (self.stack[-1],)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000981 dispatch[TUPLE1[0]] = load_tuple1
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000982
983 def load_tuple2(self):
984 self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000985 dispatch[TUPLE2[0]] = load_tuple2
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000986
987 def load_tuple3(self):
988 self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000989 dispatch[TUPLE3[0]] = load_tuple3
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000990
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000991 def load_empty_list(self):
992 self.stack.append([])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000993 dispatch[EMPTY_LIST[0]] = load_empty_list
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000994
995 def load_empty_dictionary(self):
996 self.stack.append({})
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000997 dispatch[EMPTY_DICT[0]] = load_empty_dictionary
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000998
999 def load_list(self):
1000 k = self.marker()
1001 self.stack[k:] = [self.stack[k+1:]]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001002 dispatch[LIST[0]] = load_list
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001003
1004 def load_dict(self):
1005 k = self.marker()
1006 d = {}
1007 items = self.stack[k+1:]
1008 for i in range(0, len(items), 2):
1009 key = items[i]
1010 value = items[i+1]
1011 d[key] = value
1012 self.stack[k:] = [d]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001013 dispatch[DICT[0]] = load_dict
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001014
Tim Petersd01c1e92003-01-30 15:41:46 +00001015 # INST and OBJ differ only in how they get a class object. It's not
1016 # only sensible to do the rest in a common routine, the two routines
1017 # previously diverged and grew different bugs.
1018 # klass is the class to instantiate, and k points to the topmost mark
1019 # object, following which are the arguments for klass.__init__.
1020 def _instantiate(self, klass, k):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001021 args = tuple(self.stack[k+1:])
1022 del self.stack[k:]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +00001023 instantiated = 0
Tim Petersd01c1e92003-01-30 15:41:46 +00001024 if (not args and
Guido van Rossum13257902007-06-07 23:15:56 +00001025 isinstance(klass, type) and
Tim Petersd01c1e92003-01-30 15:41:46 +00001026 not hasattr(klass, "__getinitargs__")):
Guido van Rossuma8add0e2007-05-14 22:03:55 +00001027 value = _EmptyClass()
1028 value.__class__ = klass
1029 instantiated = 1
Guido van Rossum45e2fbc1998-03-26 21:13:24 +00001030 if not instantiated:
Guido van Rossum743d17e1998-09-15 20:25:57 +00001031 try:
Guido van Rossumb26a97a2003-01-28 22:29:13 +00001032 value = klass(*args)
Guido van Rossumb940e112007-01-10 16:19:56 +00001033 except TypeError as err:
Guido van Rossum26d95c32007-08-27 23:18:54 +00001034 raise TypeError("in constructor for %s: %s" %
1035 (klass.__name__, str(err)), sys.exc_info()[2])
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001036 self.append(value)
Tim Petersd01c1e92003-01-30 15:41:46 +00001037
1038 def load_inst(self):
1039 module = self.readline()[:-1]
1040 name = self.readline()[:-1]
1041 klass = self.find_class(module, name)
1042 self._instantiate(klass, self.marker())
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001043 dispatch[INST[0]] = load_inst
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001044
1045 def load_obj(self):
Tim Petersd01c1e92003-01-30 15:41:46 +00001046 # Stack is ... markobject classobject arg1 arg2 ...
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001047 k = self.marker()
Tim Petersd01c1e92003-01-30 15:41:46 +00001048 klass = self.stack.pop(k+1)
1049 self._instantiate(klass, k)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001050 dispatch[OBJ[0]] = load_obj
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001051
Guido van Rossum3a41c612003-01-28 15:10:22 +00001052 def load_newobj(self):
1053 args = self.stack.pop()
1054 cls = self.stack[-1]
1055 obj = cls.__new__(cls, *args)
Guido van Rossum533dbcf2003-01-28 17:55:05 +00001056 self.stack[-1] = obj
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001057 dispatch[NEWOBJ[0]] = load_newobj
Guido van Rossum3a41c612003-01-28 15:10:22 +00001058
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001059 def load_global(self):
1060 module = self.readline()[:-1]
1061 name = self.readline()[:-1]
1062 klass = self.find_class(module, name)
1063 self.append(klass)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001064 dispatch[GLOBAL[0]] = load_global
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001065
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001066 def load_ext1(self):
1067 code = ord(self.read(1))
1068 self.get_extension(code)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001069 dispatch[EXT1[0]] = load_ext1
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001070
1071 def load_ext2(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001072 code = mloads(b'i' + self.read(2) + b'\000\000')
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001073 self.get_extension(code)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001074 dispatch[EXT2[0]] = load_ext2
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001075
1076 def load_ext4(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001077 code = mloads(b'i' + self.read(4))
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001078 self.get_extension(code)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001079 dispatch[EXT4[0]] = load_ext4
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001080
1081 def get_extension(self, code):
1082 nil = []
Guido van Rossumd4b920c2003-02-04 01:54:49 +00001083 obj = _extension_cache.get(code, nil)
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001084 if obj is not nil:
1085 self.append(obj)
1086 return
Guido van Rossumd4b920c2003-02-04 01:54:49 +00001087 key = _inverted_registry.get(code)
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001088 if not key:
1089 raise ValueError("unregistered extension code %d" % code)
1090 obj = self.find_class(*key)
Guido van Rossumd4b920c2003-02-04 01:54:49 +00001091 _extension_cache[code] = obj
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001092 self.append(obj)
1093
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001094 def find_class(self, module, name):
Guido van Rossumb26a97a2003-01-28 22:29:13 +00001095 # Subclasses may override this
Guido van Rossum98297ee2007-11-06 21:34:58 +00001096 if isinstance(module, bytes_types):
Guido van Rossumfeea0782007-10-10 18:00:50 +00001097 module = module.decode("utf-8")
Guido van Rossum98297ee2007-11-06 21:34:58 +00001098 if isinstance(name, bytes_types):
Guido van Rossumfeea0782007-10-10 18:00:50 +00001099 name = name.decode("utf-8")
Barry Warsawbf4d9592001-11-15 23:42:58 +00001100 __import__(module)
1101 mod = sys.modules[module]
1102 klass = getattr(mod, name)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001103 return klass
1104
1105 def load_reduce(self):
1106 stack = self.stack
Guido van Rossumb26a97a2003-01-28 22:29:13 +00001107 args = stack.pop()
1108 func = stack[-1]
Guido van Rossum99603b02007-07-20 00:22:32 +00001109 try:
1110 value = func(*args)
1111 except:
1112 print(sys.exc_info())
1113 print(func, args)
1114 raise
Guido van Rossumb26a97a2003-01-28 22:29:13 +00001115 stack[-1] = value
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001116 dispatch[REDUCE[0]] = load_reduce
Guido van Rossuma48061a1995-01-10 00:31:14 +00001117
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001118 def load_pop(self):
1119 del self.stack[-1]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001120 dispatch[POP[0]] = load_pop
Guido van Rossum7b5430f1995-03-04 22:25:21 +00001121
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001122 def load_pop_mark(self):
1123 k = self.marker()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +00001124 del self.stack[k:]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001125 dispatch[POP_MARK[0]] = load_pop_mark
Guido van Rossuma48061a1995-01-10 00:31:14 +00001126
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001127 def load_dup(self):
Guido van Rossumb1062fc1998-03-31 17:00:46 +00001128 self.append(self.stack[-1])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001129 dispatch[DUP[0]] = load_dup
Guido van Rossuma48061a1995-01-10 00:31:14 +00001130
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001131 def load_get(self):
Guido van Rossum98297ee2007-11-06 21:34:58 +00001132 self.append(self.memo[self.readline()[:-1].decode("ascii")])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001133 dispatch[GET[0]] = load_get
Guido van Rossum78536471996-04-12 13:36:27 +00001134
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001135 def load_binget(self):
Tim Petersbbf63cd2003-01-27 21:15:36 +00001136 i = ord(self.read(1))
Walter Dörwald70a6b492004-02-12 17:35:32 +00001137 self.append(self.memo[repr(i)])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001138 dispatch[BINGET[0]] = load_binget
Guido van Rossum78536471996-04-12 13:36:27 +00001139
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001140 def load_long_binget(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001141 i = mloads(b'i' + self.read(4))
Walter Dörwald70a6b492004-02-12 17:35:32 +00001142 self.append(self.memo[repr(i)])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001143 dispatch[LONG_BINGET[0]] = load_long_binget
Guido van Rossum78536471996-04-12 13:36:27 +00001144
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001145 def load_put(self):
Guido van Rossum98297ee2007-11-06 21:34:58 +00001146 self.memo[self.readline()[:-1].decode("ascii")] = self.stack[-1]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001147 dispatch[PUT[0]] = load_put
Guido van Rossuma48061a1995-01-10 00:31:14 +00001148
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001149 def load_binput(self):
Tim Petersbbf63cd2003-01-27 21:15:36 +00001150 i = ord(self.read(1))
Walter Dörwald70a6b492004-02-12 17:35:32 +00001151 self.memo[repr(i)] = self.stack[-1]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001152 dispatch[BINPUT[0]] = load_binput
Guido van Rossuma48061a1995-01-10 00:31:14 +00001153
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001154 def load_long_binput(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001155 i = mloads(b'i' + self.read(4))
Walter Dörwald70a6b492004-02-12 17:35:32 +00001156 self.memo[repr(i)] = self.stack[-1]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001157 dispatch[LONG_BINPUT[0]] = load_long_binput
Guido van Rossuma48061a1995-01-10 00:31:14 +00001158
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001159 def load_append(self):
1160 stack = self.stack
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001161 value = stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001162 list = stack[-1]
1163 list.append(value)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001164 dispatch[APPEND[0]] = load_append
Guido van Rossuma48061a1995-01-10 00:31:14 +00001165
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001166 def load_appends(self):
1167 stack = self.stack
1168 mark = self.marker()
1169 list = stack[mark - 1]
Tim Peters209ad952003-01-28 01:44:45 +00001170 list.extend(stack[mark + 1:])
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001171 del stack[mark:]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001172 dispatch[APPENDS[0]] = load_appends
Tim Peters2344fae2001-01-15 00:50:52 +00001173
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001174 def load_setitem(self):
1175 stack = self.stack
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001176 value = stack.pop()
1177 key = stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001178 dict = stack[-1]
1179 dict[key] = value
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001180 dispatch[SETITEM[0]] = load_setitem
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001181
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001182 def load_setitems(self):
1183 stack = self.stack
1184 mark = self.marker()
1185 dict = stack[mark - 1]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +00001186 for i in range(mark + 1, len(stack), 2):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001187 dict[stack[i]] = stack[i + 1]
Guido van Rossuma48061a1995-01-10 00:31:14 +00001188
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001189 del stack[mark:]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001190 dispatch[SETITEMS[0]] = load_setitems
Guido van Rossuma48061a1995-01-10 00:31:14 +00001191
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001192 def load_build(self):
1193 stack = self.stack
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001194 state = stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001195 inst = stack[-1]
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001196 setstate = getattr(inst, "__setstate__", None)
1197 if setstate:
1198 setstate(state)
1199 return
1200 slotstate = None
1201 if isinstance(state, tuple) and len(state) == 2:
1202 state, slotstate = state
1203 if state:
Guido van Rossuma8add0e2007-05-14 22:03:55 +00001204 inst.__dict__.update(state)
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001205 if slotstate:
1206 for k, v in slotstate.items():
1207 setattr(inst, k, v)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001208 dispatch[BUILD[0]] = load_build
Guido van Rossuma48061a1995-01-10 00:31:14 +00001209
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001210 def load_mark(self):
1211 self.append(self.mark)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001212 dispatch[MARK[0]] = load_mark
Guido van Rossuma48061a1995-01-10 00:31:14 +00001213
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001214 def load_stop(self):
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001215 value = self.stack.pop()
Guido van Rossumff871742000-12-13 18:11:56 +00001216 raise _Stop(value)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001217 dispatch[STOP[0]] = load_stop
Guido van Rossuma48061a1995-01-10 00:31:14 +00001218
Guido van Rossume467be61997-12-05 19:42:42 +00001219# Helper class for load_inst/load_obj
1220
1221class _EmptyClass:
1222 pass
Guido van Rossuma48061a1995-01-10 00:31:14 +00001223
Tim Peters91149822003-01-31 03:43:58 +00001224# Encode/decode longs in linear time.
1225
1226import binascii as _binascii
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001227
1228def encode_long(x):
Tim Peters91149822003-01-31 03:43:58 +00001229 r"""Encode a long to a two's complement little-endian binary string.
Guido van Rossume2a383d2007-01-15 16:59:06 +00001230 Note that 0 is a special case, returning an empty string, to save a
Tim Peters4b23f2b2003-01-31 16:43:39 +00001231 byte in the LONG1 pickling context.
1232
Guido van Rossume2a383d2007-01-15 16:59:06 +00001233 >>> encode_long(0)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001234 b''
Guido van Rossume2a383d2007-01-15 16:59:06 +00001235 >>> encode_long(255)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001236 b'\xff\x00'
Guido van Rossume2a383d2007-01-15 16:59:06 +00001237 >>> encode_long(32767)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001238 b'\xff\x7f'
Guido van Rossume2a383d2007-01-15 16:59:06 +00001239 >>> encode_long(-256)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001240 b'\x00\xff'
Guido van Rossume2a383d2007-01-15 16:59:06 +00001241 >>> encode_long(-32768)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001242 b'\x00\x80'
Guido van Rossume2a383d2007-01-15 16:59:06 +00001243 >>> encode_long(-128)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001244 b'\x80'
Guido van Rossume2a383d2007-01-15 16:59:06 +00001245 >>> encode_long(127)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001246 b'\x7f'
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001247 >>>
1248 """
Tim Peters91149822003-01-31 03:43:58 +00001249
1250 if x == 0:
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001251 return b''
Tim Peters91149822003-01-31 03:43:58 +00001252 if x > 0:
1253 ashex = hex(x)
1254 assert ashex.startswith("0x")
1255 njunkchars = 2 + ashex.endswith('L')
1256 nibbles = len(ashex) - njunkchars
1257 if nibbles & 1:
1258 # need an even # of nibbles for unhexlify
1259 ashex = "0x0" + ashex[2:]
Tim Peters4b23f2b2003-01-31 16:43:39 +00001260 elif int(ashex[2], 16) >= 8:
Tim Peters91149822003-01-31 03:43:58 +00001261 # "looks negative", so need a byte of sign bits
1262 ashex = "0x00" + ashex[2:]
1263 else:
1264 # Build the 256's-complement: (1L << nbytes) + x. The trick is
1265 # to find the number of bytes in linear time (although that should
1266 # really be a constant-time task).
1267 ashex = hex(-x)
1268 assert ashex.startswith("0x")
1269 njunkchars = 2 + ashex.endswith('L')
1270 nibbles = len(ashex) - njunkchars
1271 if nibbles & 1:
Tim Petersee1a53c2003-02-02 02:57:53 +00001272 # Extend to a full byte.
Tim Peters91149822003-01-31 03:43:58 +00001273 nibbles += 1
Tim Peters4b23f2b2003-01-31 16:43:39 +00001274 nbits = nibbles * 4
Guido van Rossume2a383d2007-01-15 16:59:06 +00001275 x += 1 << nbits
Tim Peters91149822003-01-31 03:43:58 +00001276 assert x > 0
1277 ashex = hex(x)
Tim Petersee1a53c2003-02-02 02:57:53 +00001278 njunkchars = 2 + ashex.endswith('L')
1279 newnibbles = len(ashex) - njunkchars
1280 if newnibbles < nibbles:
1281 ashex = "0x" + "0" * (nibbles - newnibbles) + ashex[2:]
1282 if int(ashex[2], 16) < 8:
Tim Peters91149822003-01-31 03:43:58 +00001283 # "looks positive", so need a byte of sign bits
Tim Petersee1a53c2003-02-02 02:57:53 +00001284 ashex = "0xff" + ashex[2:]
Tim Peters91149822003-01-31 03:43:58 +00001285
1286 if ashex.endswith('L'):
1287 ashex = ashex[2:-1]
1288 else:
1289 ashex = ashex[2:]
Tim Petersee1a53c2003-02-02 02:57:53 +00001290 assert len(ashex) & 1 == 0, (x, ashex)
Tim Peters91149822003-01-31 03:43:58 +00001291 binary = _binascii.unhexlify(ashex)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001292 return bytes(binary[::-1])
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001293
1294def decode_long(data):
1295 r"""Decode a long from a two's complement little-endian binary string.
Tim Peters4b23f2b2003-01-31 16:43:39 +00001296
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001297 >>> decode_long(b'')
Guido van Rossume2b70bc2006-08-18 22:13:04 +00001298 0
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001299 >>> decode_long(b"\xff\x00")
Guido van Rossume2b70bc2006-08-18 22:13:04 +00001300 255
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001301 >>> decode_long(b"\xff\x7f")
Guido van Rossume2b70bc2006-08-18 22:13:04 +00001302 32767
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001303 >>> decode_long(b"\x00\xff")
Guido van Rossume2b70bc2006-08-18 22:13:04 +00001304 -256
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001305 >>> decode_long(b"\x00\x80")
Guido van Rossume2b70bc2006-08-18 22:13:04 +00001306 -32768
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001307 >>> decode_long(b"\x80")
Guido van Rossume2b70bc2006-08-18 22:13:04 +00001308 -128
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001309 >>> decode_long(b"\x7f")
Guido van Rossume2b70bc2006-08-18 22:13:04 +00001310 127
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001311 """
Tim Peters91149822003-01-31 03:43:58 +00001312
Tim Peters4b23f2b2003-01-31 16:43:39 +00001313 nbytes = len(data)
1314 if nbytes == 0:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001315 return 0
Tim Peters91149822003-01-31 03:43:58 +00001316 ashex = _binascii.hexlify(data[::-1])
Guido van Rossume2a383d2007-01-15 16:59:06 +00001317 n = int(ashex, 16) # quadratic time before Python 2.3; linear now
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001318 if data[-1] >= 0x80:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001319 n -= 1 << (nbytes * 8)
Tim Peters91149822003-01-31 03:43:58 +00001320 return n
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001321
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001322# Shorthands
1323
Raymond Hettinger3489cad2004-12-05 05:20:42 +00001324def dump(obj, file, protocol=None):
1325 Pickler(file, protocol).dump(obj)
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001326
Raymond Hettinger3489cad2004-12-05 05:20:42 +00001327def dumps(obj, protocol=None):
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001328 f = io.BytesIO()
1329 Pickler(f, protocol).dump(obj)
1330 res = f.getvalue()
Guido van Rossum98297ee2007-11-06 21:34:58 +00001331 assert isinstance(res, bytes_types)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001332 return res
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001333
1334def load(file):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001335 return Unpickler(file).load()
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001336
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001337def loads(s):
1338 if isinstance(s, str):
1339 raise TypeError("Can't load pickle from unicode string")
1340 file = io.BytesIO(s)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001341 return Unpickler(file).load()
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001342
1343# Doctest
1344
1345def _test():
1346 import doctest
1347 return doctest.testmod()
1348
1349if __name__ == "__main__":
1350 _test()