bpo-42103: Improve validation of Plist files. (GH-22882)


* Prevent some possible DoS attacks via providing invalid Plist files
  with extremely large number of objects or collection sizes.
* Raise InvalidFileException for too large bytes and string size instead of returning garbage.
* Raise InvalidFileException instead of ValueError for specific invalid datetime (NaN).
* Raise InvalidFileException instead of TypeError for non-hashable dict keys.
* Add more tests for invalid Plist files.
(cherry picked from commit 34637a0ce21e7261b952fbd9d006474cc29b681f)

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
diff --git a/Lib/plistlib.py b/Lib/plistlib.py
index a740351..83b214e 100644
--- a/Lib/plistlib.py
+++ b/Lib/plistlib.py
@@ -477,7 +477,7 @@
             return self._read_object(top_object)
 
         except (OSError, IndexError, struct.error, OverflowError,
-                UnicodeDecodeError):
+                ValueError):
             raise InvalidFileException()
 
     def _get_size(self, tokenL):
@@ -493,7 +493,7 @@
     def _read_ints(self, n, size):
         data = self._fp.read(size * n)
         if size in _BINARY_FORMAT:
-            return struct.unpack('>' + _BINARY_FORMAT[size] * n, data)
+            return struct.unpack(f'>{n}{_BINARY_FORMAT[size]}', data)
         else:
             if not size or len(data) != size * n:
                 raise InvalidFileException()
@@ -553,14 +553,22 @@
         elif tokenH == 0x40:  # data
             s = self._get_size(tokenL)
             result = self._fp.read(s)
+            if len(result) != s:
+                raise InvalidFileException()
 
         elif tokenH == 0x50:  # ascii string
             s = self._get_size(tokenL)
-            result =  self._fp.read(s).decode('ascii')
+            data = self._fp.read(s)
+            if len(data) != s:
+                raise InvalidFileException()
+            result = data.decode('ascii')
 
         elif tokenH == 0x60:  # unicode string
-            s = self._get_size(tokenL)
-            result = self._fp.read(s * 2).decode('utf-16be')
+            s = self._get_size(tokenL) * 2
+            data = self._fp.read(s)
+            if len(data) != s:
+                raise InvalidFileException()
+            result = data.decode('utf-16be')
 
         elif tokenH == 0x80:  # UID
             # used by Key-Archiver plist files
@@ -585,9 +593,11 @@
             obj_refs = self._read_refs(s)
             result = self._dict_type()
             self._objects[ref] = result
-            for k, o in zip(key_refs, obj_refs):
-                result[self._read_object(k)] = self._read_object(o)
-
+            try:
+                for k, o in zip(key_refs, obj_refs):
+                    result[self._read_object(k)] = self._read_object(o)
+            except TypeError:
+                raise InvalidFileException()
         else:
             raise InvalidFileException()