Issue #25761: Improved detecting errors in broken pickle data.
diff --git a/Lib/pickle.py b/Lib/pickle.py
index 53978fb..a60b1b7 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -1031,7 +1031,7 @@
self._unframer = _Unframer(self._file_read, self._file_readline)
self.read = self._unframer.read
self.readline = self._unframer.readline
- self.mark = object() # any new unique object
+ self.metastack = []
self.stack = []
self.append = self.stack.append
self.proto = 0
@@ -1047,20 +1047,12 @@
except _Stop as stopinst:
return stopinst.value
- # Return largest index k such that self.stack[k] is self.mark.
- # If the stack doesn't contain a mark, eventually raises IndexError.
- # This could be sped by maintaining another stack, of indices at which
- # the mark appears. For that matter, the latter stack would suffice,
- # and we wouldn't need to push mark objects on self.stack at all.
- # Doing so is probably a good thing, though, since if the pickle is
- # corrupt (or hostile) we may get a clue from finding self.mark embedded
- # in unpickled objects.
- def marker(self):
- stack = self.stack
- mark = self.mark
- k = len(stack)-1
- while stack[k] is not mark: k = k-1
- return k
+ # Return a list of items pushed in the stack after last MARK instruction.
+ def pop_mark(self):
+ items = self.stack
+ self.stack = self.metastack.pop()
+ self.append = self.stack.append
+ return items
def persistent_load(self, pid):
raise UnpicklingError("unsupported persistent id encountered")
@@ -1237,8 +1229,8 @@
dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode
def load_tuple(self):
- k = self.marker()
- self.stack[k:] = [tuple(self.stack[k+1:])]
+ items = self.pop_mark()
+ self.append(tuple(items))
dispatch[TUPLE[0]] = load_tuple
def load_empty_tuple(self):
@@ -1270,21 +1262,20 @@
dispatch[EMPTY_SET[0]] = load_empty_set
def load_frozenset(self):
- k = self.marker()
- self.stack[k:] = [frozenset(self.stack[k+1:])]
+ items = self.pop_mark()
+ self.append(frozenset(items))
dispatch[FROZENSET[0]] = load_frozenset
def load_list(self):
- k = self.marker()
- self.stack[k:] = [self.stack[k+1:]]
+ items = self.pop_mark()
+ self.append(items)
dispatch[LIST[0]] = load_list
def load_dict(self):
- k = self.marker()
- items = self.stack[k+1:]
+ items = self.pop_mark()
d = {items[i]: items[i+1]
for i in range(0, len(items), 2)}
- self.stack[k:] = [d]
+ self.append(d)
dispatch[DICT[0]] = load_dict
# INST and OBJ differ only in how they get a class object. It's not
@@ -1292,9 +1283,7 @@
# previously diverged and grew different bugs.
# klass is the class to instantiate, and k points to the topmost mark
# object, following which are the arguments for klass.__init__.
- def _instantiate(self, klass, k):
- args = tuple(self.stack[k+1:])
- del self.stack[k:]
+ def _instantiate(self, klass, args):
if (args or not isinstance(klass, type) or
hasattr(klass, "__getinitargs__")):
try:
@@ -1310,14 +1299,14 @@
module = self.readline()[:-1].decode("ascii")
name = self.readline()[:-1].decode("ascii")
klass = self.find_class(module, name)
- self._instantiate(klass, self.marker())
+ self._instantiate(klass, self.pop_mark())
dispatch[INST[0]] = load_inst
def load_obj(self):
# Stack is ... markobject classobject arg1 arg2 ...
- k = self.marker()
- klass = self.stack.pop(k+1)
- self._instantiate(klass, k)
+ args = self.pop_mark()
+ cls = args.pop(0)
+ self._instantiate(cls, args)
dispatch[OBJ[0]] = load_obj
def load_newobj(self):
@@ -1402,12 +1391,14 @@
dispatch[REDUCE[0]] = load_reduce
def load_pop(self):
- del self.stack[-1]
+ if self.stack:
+ del self.stack[-1]
+ else:
+ self.pop_mark()
dispatch[POP[0]] = load_pop
def load_pop_mark(self):
- k = self.marker()
- del self.stack[k:]
+ self.pop_mark()
dispatch[POP_MARK[0]] = load_pop_mark
def load_dup(self):
@@ -1463,17 +1454,14 @@
dispatch[APPEND[0]] = load_append
def load_appends(self):
- stack = self.stack
- mark = self.marker()
- list_obj = stack[mark - 1]
- items = stack[mark + 1:]
+ items = self.pop_mark()
+ list_obj = self.stack[-1]
if isinstance(list_obj, list):
list_obj.extend(items)
else:
append = list_obj.append
for item in items:
append(item)
- del stack[mark:]
dispatch[APPENDS[0]] = load_appends
def load_setitem(self):
@@ -1485,27 +1473,21 @@
dispatch[SETITEM[0]] = load_setitem
def load_setitems(self):
- stack = self.stack
- mark = self.marker()
- dict = stack[mark - 1]
- for i in range(mark + 1, len(stack), 2):
- dict[stack[i]] = stack[i + 1]
-
- del stack[mark:]
+ items = self.pop_mark()
+ dict = self.stack[-1]
+ for i in range(0, len(items), 2):
+ dict[items[i]] = items[i + 1]
dispatch[SETITEMS[0]] = load_setitems
def load_additems(self):
- stack = self.stack
- mark = self.marker()
- set_obj = stack[mark - 1]
- items = stack[mark + 1:]
+ items = self.pop_mark()
+ set_obj = self.stack[-1]
if isinstance(set_obj, set):
set_obj.update(items)
else:
add = set_obj.add
for item in items:
add(item)
- del stack[mark:]
dispatch[ADDITEMS[0]] = load_additems
def load_build(self):
@@ -1533,7 +1515,9 @@
dispatch[BUILD[0]] = load_build
def load_mark(self):
- self.append(self.mark)
+ self.metastack.append(self.stack)
+ self.stack = []
+ self.append = self.stack.append
dispatch[MARK[0]] = load_mark
def load_stop(self):
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index 608c35a..217aa3d 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -1000,7 +1000,7 @@
b'0', # POP
b'1', # POP_MARK
b'2', # DUP
- # b'(2', # PyUnpickler doesn't raise
+ b'(2',
b'R', # REDUCE
b')R',
b'a', # APPEND
@@ -1009,7 +1009,7 @@
b'Nb',
b'd', # DICT
b'e', # APPENDS
- # b'(e', # PyUnpickler raises AttributeError
+ b'(e',
b'ibuiltins\nlist\n', # INST
b'l', # LIST
b'o', # OBJ
@@ -1022,7 +1022,7 @@
b'NNs',
b't', # TUPLE
b'u', # SETITEMS
- # b'(u', # PyUnpickler doesn't raise
+ b'(u',
b'}(Nu',
b'\x81', # NEWOBJ
b')\x81',
@@ -1033,7 +1033,7 @@
b'N\x87',
b'NN\x87',
b'\x90', # ADDITEMS
- # b'(\x90', # PyUnpickler raises AttributeError
+ b'(\x90',
b'\x91', # FROZENSET
b'\x92', # NEWOBJ_EX
b')}\x92',
@@ -1046,7 +1046,7 @@
def test_bad_mark(self):
badpickles = [
- # b'N(.', # STOP
+ b'N(.', # STOP
b'N(2', # DUP
b'cbuiltins\nlist\n)(R', # REDUCE
b'cbuiltins\nlist\n()R',
@@ -1081,7 +1081,7 @@
b'N(\x94', # MEMOIZE
]
for p in badpickles:
- self.check_unpickling_error(self.bad_mark_errors, p)
+ self.check_unpickling_error(self.bad_stack_errors, p)
def test_truncated_data(self):
self.check_unpickling_error(EOFError, b'')
@@ -2581,11 +2581,6 @@
self.assertRaises(pickle.PicklingError, BadPickler().dump, 0)
self.assertRaises(pickle.UnpicklingError, BadUnpickler().load)
- def test_bad_input(self):
- # Test issue4298
- s = bytes([0x58, 0, 0, 0, 0x54])
- self.assertRaises(EOFError, pickle.loads, s)
-
class AbstractPersistentPicklerTests(unittest.TestCase):
diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py
index bd38cfb..6b97315 100644
--- a/Lib/test/test_pickle.py
+++ b/Lib/test/test_pickle.py
@@ -33,8 +33,6 @@
unpickler = pickle._Unpickler
bad_stack_errors = (IndexError,)
- bad_mark_errors = (IndexError, pickle.UnpicklingError,
- TypeError, AttributeError, EOFError)
truncated_errors = (pickle.UnpicklingError, EOFError,
AttributeError, ValueError,
struct.error, IndexError, ImportError)
@@ -69,8 +67,6 @@
pickler = pickle._Pickler
unpickler = pickle._Unpickler
bad_stack_errors = (pickle.UnpicklingError, IndexError)
- bad_mark_errors = (pickle.UnpicklingError, IndexError,
- TypeError, AttributeError, EOFError)
truncated_errors = (pickle.UnpicklingError, EOFError,
AttributeError, ValueError,
struct.error, IndexError, ImportError)
@@ -132,7 +128,6 @@
class CUnpicklerTests(PyUnpicklerTests):
unpickler = _pickle.Unpickler
bad_stack_errors = (pickle.UnpicklingError,)
- bad_mark_errors = (EOFError,)
truncated_errors = (pickle.UnpicklingError, EOFError,
AttributeError, ValueError)