Issue #25761: Improved detecting errors in broken pickle data.

commit: 59fb6342a4ffdc23e1269a418734f4fc0f984873 [log] [tgz]
author: Serhiy Storchaka <storchaka@gmail.com> Sun Dec 06 22:01:35 2015 +0200
committer: Serhiy Storchaka <storchaka@gmail.com> Sun Dec 06 22:01:35 2015 +0200
tree: 1be1e6d729c7b14769a44e80db57de2b58c2fa49
parent: c68e723e6f8bd9923d23a4f14b66504b192aba74 [diff]
diff --git a/Lib/pickle.py b/Lib/pickle.py
index 53978fb..a60b1b7 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py

@@ -1031,7 +1031,7 @@
         self._unframer = _Unframer(self._file_read, self._file_readline)
         self.read = self._unframer.read
         self.readline = self._unframer.readline
-        self.mark = object() # any new unique object
+        self.metastack = []
         self.stack = []
         self.append = self.stack.append
         self.proto = 0
@@ -1047,20 +1047,12 @@
         except _Stop as stopinst:
             return stopinst.value
 
-    # Return largest index k such that self.stack[k] is self.mark.
-    # If the stack doesn't contain a mark, eventually raises IndexError.
-    # This could be sped by maintaining another stack, of indices at which
-    # the mark appears.  For that matter, the latter stack would suffice,
-    # and we wouldn't need to push mark objects on self.stack at all.
-    # Doing so is probably a good thing, though, since if the pickle is
-    # corrupt (or hostile) we may get a clue from finding self.mark embedded
-    # in unpickled objects.
-    def marker(self):
-        stack = self.stack
-        mark = self.mark
-        k = len(stack)-1
-        while stack[k] is not mark: k = k-1
-        return k
+    # Return a list of items pushed in the stack after last MARK instruction.
+    def pop_mark(self):
+        items = self.stack
+        self.stack = self.metastack.pop()
+        self.append = self.stack.append
+        return items
 
     def persistent_load(self, pid):
         raise UnpicklingError("unsupported persistent id encountered")
@@ -1237,8 +1229,8 @@
     dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode
 
     def load_tuple(self):
-        k = self.marker()
-        self.stack[k:] = [tuple(self.stack[k+1:])]
+        items = self.pop_mark()
+        self.append(tuple(items))
     dispatch[TUPLE[0]] = load_tuple
 
     def load_empty_tuple(self):
@@ -1270,21 +1262,20 @@
     dispatch[EMPTY_SET[0]] = load_empty_set
 
     def load_frozenset(self):
-        k = self.marker()
-        self.stack[k:] = [frozenset(self.stack[k+1:])]
+        items = self.pop_mark()
+        self.append(frozenset(items))
     dispatch[FROZENSET[0]] = load_frozenset
 
     def load_list(self):
-        k = self.marker()
-        self.stack[k:] = [self.stack[k+1:]]
+        items = self.pop_mark()
+        self.append(items)
     dispatch[LIST[0]] = load_list
 
     def load_dict(self):
-        k = self.marker()
-        items = self.stack[k+1:]
+        items = self.pop_mark()
         d = {items[i]: items[i+1]
              for i in range(0, len(items), 2)}
-        self.stack[k:] = [d]
+        self.append(d)
     dispatch[DICT[0]] = load_dict
 
     # INST and OBJ differ only in how they get a class object.  It's not
@@ -1292,9 +1283,7 @@
     # previously diverged and grew different bugs.
     # klass is the class to instantiate, and k points to the topmost mark
     # object, following which are the arguments for klass.__init__.
-    def _instantiate(self, klass, k):
-        args = tuple(self.stack[k+1:])
-        del self.stack[k:]
+    def _instantiate(self, klass, args):
         if (args or not isinstance(klass, type) or
             hasattr(klass, "__getinitargs__")):
             try:
@@ -1310,14 +1299,14 @@
         module = self.readline()[:-1].decode("ascii")
         name = self.readline()[:-1].decode("ascii")
         klass = self.find_class(module, name)
-        self._instantiate(klass, self.marker())
+        self._instantiate(klass, self.pop_mark())
     dispatch[INST[0]] = load_inst
 
     def load_obj(self):
         # Stack is ... markobject classobject arg1 arg2 ...
-        k = self.marker()
-        klass = self.stack.pop(k+1)
-        self._instantiate(klass, k)
+        args = self.pop_mark()
+        cls = args.pop(0)
+        self._instantiate(cls, args)
     dispatch[OBJ[0]] = load_obj
 
     def load_newobj(self):
@@ -1402,12 +1391,14 @@
     dispatch[REDUCE[0]] = load_reduce
 
     def load_pop(self):
-        del self.stack[-1]
+        if self.stack:
+            del self.stack[-1]
+        else:
+            self.pop_mark()
     dispatch[POP[0]] = load_pop
 
     def load_pop_mark(self):
-        k = self.marker()
-        del self.stack[k:]
+        self.pop_mark()
     dispatch[POP_MARK[0]] = load_pop_mark
 
     def load_dup(self):
@@ -1463,17 +1454,14 @@
     dispatch[APPEND[0]] = load_append
 
     def load_appends(self):
-        stack = self.stack
-        mark = self.marker()
-        list_obj = stack[mark - 1]
-        items = stack[mark + 1:]
+        items = self.pop_mark()
+        list_obj = self.stack[-1]
         if isinstance(list_obj, list):
             list_obj.extend(items)
         else:
             append = list_obj.append
             for item in items:
                 append(item)
-        del stack[mark:]
     dispatch[APPENDS[0]] = load_appends
 
     def load_setitem(self):
@@ -1485,27 +1473,21 @@
     dispatch[SETITEM[0]] = load_setitem
 
     def load_setitems(self):
-        stack = self.stack
-        mark = self.marker()
-        dict = stack[mark - 1]
-        for i in range(mark + 1, len(stack), 2):
-            dict[stack[i]] = stack[i + 1]
-
-        del stack[mark:]
+        items = self.pop_mark()
+        dict = self.stack[-1]
+        for i in range(0, len(items), 2):
+            dict[items[i]] = items[i + 1]
     dispatch[SETITEMS[0]] = load_setitems
 
     def load_additems(self):
-        stack = self.stack
-        mark = self.marker()
-        set_obj = stack[mark - 1]
-        items = stack[mark + 1:]
+        items = self.pop_mark()
+        set_obj = self.stack[-1]
         if isinstance(set_obj, set):
             set_obj.update(items)
         else:
             add = set_obj.add
             for item in items:
                 add(item)
-        del stack[mark:]
     dispatch[ADDITEMS[0]] = load_additems
 
     def load_build(self):
@@ -1533,7 +1515,9 @@
     dispatch[BUILD[0]] = load_build
 
     def load_mark(self):
-        self.append(self.mark)
+        self.metastack.append(self.stack)
+        self.stack = []
+        self.append = self.stack.append
     dispatch[MARK[0]] = load_mark
 
     def load_stop(self):

diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index 608c35a..217aa3d 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py

@@ -1000,7 +1000,7 @@
             b'0',                       # POP
             b'1',                       # POP_MARK
             b'2',                       # DUP
-            # b'(2',                    # PyUnpickler doesn't raise
+            b'(2',
             b'R',                       # REDUCE
             b')R',
             b'a',                       # APPEND
@@ -1009,7 +1009,7 @@
             b'Nb',
             b'd',                       # DICT
             b'e',                       # APPENDS
-            # b'(e',                    # PyUnpickler raises AttributeError
+            b'(e',
             b'ibuiltins\nlist\n',       # INST
             b'l',                       # LIST
             b'o',                       # OBJ
@@ -1022,7 +1022,7 @@
             b'NNs',
             b't',                       # TUPLE
             b'u',                       # SETITEMS
-            # b'(u',                    # PyUnpickler doesn't raise
+            b'(u',
             b'}(Nu',
             b'\x81',                    # NEWOBJ
             b')\x81',
@@ -1033,7 +1033,7 @@
             b'N\x87',
             b'NN\x87',
             b'\x90',                    # ADDITEMS
-            # b'(\x90',                 # PyUnpickler raises AttributeError
+            b'(\x90',
             b'\x91',                    # FROZENSET
             b'\x92',                    # NEWOBJ_EX
             b')}\x92',
@@ -1046,7 +1046,7 @@
 
     def test_bad_mark(self):
         badpickles = [
-            # b'N(.',                     # STOP
+            b'N(.',                     # STOP
             b'N(2',                     # DUP
             b'cbuiltins\nlist\n)(R',    # REDUCE
             b'cbuiltins\nlist\n()R',
@@ -1081,7 +1081,7 @@
             b'N(\x94',                  # MEMOIZE
         ]
         for p in badpickles:
-            self.check_unpickling_error(self.bad_mark_errors, p)
+            self.check_unpickling_error(self.bad_stack_errors, p)
 
     def test_truncated_data(self):
         self.check_unpickling_error(EOFError, b'')
@@ -2581,11 +2581,6 @@
         self.assertRaises(pickle.PicklingError, BadPickler().dump, 0)
         self.assertRaises(pickle.UnpicklingError, BadUnpickler().load)
 
-    def test_bad_input(self):
-        # Test issue4298
-        s = bytes([0x58, 0, 0, 0, 0x54])
-        self.assertRaises(EOFError, pickle.loads, s)
-
 
 class AbstractPersistentPicklerTests(unittest.TestCase):
 

diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py
index bd38cfb..6b97315 100644
--- a/Lib/test/test_pickle.py
+++ b/Lib/test/test_pickle.py

@@ -33,8 +33,6 @@
 
     unpickler = pickle._Unpickler
     bad_stack_errors = (IndexError,)
-    bad_mark_errors = (IndexError, pickle.UnpicklingError,
-                       TypeError, AttributeError, EOFError)
     truncated_errors = (pickle.UnpicklingError, EOFError,
                         AttributeError, ValueError,
                         struct.error, IndexError, ImportError)
@@ -69,8 +67,6 @@
     pickler = pickle._Pickler
     unpickler = pickle._Unpickler
     bad_stack_errors = (pickle.UnpicklingError, IndexError)
-    bad_mark_errors = (pickle.UnpicklingError, IndexError,
-                       TypeError, AttributeError, EOFError)
     truncated_errors = (pickle.UnpicklingError, EOFError,
                         AttributeError, ValueError,
                         struct.error, IndexError, ImportError)
@@ -132,7 +128,6 @@
     class CUnpicklerTests(PyUnpicklerTests):
         unpickler = _pickle.Unpickler
         bad_stack_errors = (pickle.UnpicklingError,)
-        bad_mark_errors = (EOFError,)
         truncated_errors = (pickle.UnpicklingError, EOFError,
                             AttributeError, ValueError)
commit	59fb6342a4ffdc23e1269a418734f4fc0f984873	[log] [tgz]
author	Serhiy Storchaka <storchaka@gmail.com>	Sun Dec 06 22:01:35 2015 +0200
committer	Serhiy Storchaka <storchaka@gmail.com>	Sun Dec 06 22:01:35 2015 +0200
tree	1be1e6d729c7b14769a44e80db57de2b58c2fa49
parent	c68e723e6f8bd9923d23a4f14b66504b192aba74 [diff]