bpo-42103: Improve validation of Plist files. (GH-22882)
* Prevent some possible DoS attacks via providing invalid Plist files
with extremely large number of objects or collection sizes.
* Raise InvalidFileException for too large bytes and string size instead of returning garbage.
* Raise InvalidFileException instead of ValueError for specific invalid datetime (NaN).
* Raise InvalidFileException instead of TypeError for non-hashable dict keys.
* Add more tests for invalid Plist files.
(cherry picked from commit 34637a0ce21e7261b952fbd9d006474cc29b681f)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
diff --git a/Lib/plistlib.py b/Lib/plistlib.py
index a740351..83b214e 100644
--- a/Lib/plistlib.py
+++ b/Lib/plistlib.py
@@ -477,7 +477,7 @@
return self._read_object(top_object)
except (OSError, IndexError, struct.error, OverflowError,
- UnicodeDecodeError):
+ ValueError):
raise InvalidFileException()
def _get_size(self, tokenL):
@@ -493,7 +493,7 @@
def _read_ints(self, n, size):
data = self._fp.read(size * n)
if size in _BINARY_FORMAT:
- return struct.unpack('>' + _BINARY_FORMAT[size] * n, data)
+ return struct.unpack(f'>{n}{_BINARY_FORMAT[size]}', data)
else:
if not size or len(data) != size * n:
raise InvalidFileException()
@@ -553,14 +553,22 @@
elif tokenH == 0x40: # data
s = self._get_size(tokenL)
result = self._fp.read(s)
+ if len(result) != s:
+ raise InvalidFileException()
elif tokenH == 0x50: # ascii string
s = self._get_size(tokenL)
- result = self._fp.read(s).decode('ascii')
+ data = self._fp.read(s)
+ if len(data) != s:
+ raise InvalidFileException()
+ result = data.decode('ascii')
elif tokenH == 0x60: # unicode string
- s = self._get_size(tokenL)
- result = self._fp.read(s * 2).decode('utf-16be')
+ s = self._get_size(tokenL) * 2
+ data = self._fp.read(s)
+ if len(data) != s:
+ raise InvalidFileException()
+ result = data.decode('utf-16be')
elif tokenH == 0x80: # UID
# used by Key-Archiver plist files
@@ -585,9 +593,11 @@
obj_refs = self._read_refs(s)
result = self._dict_type()
self._objects[ref] = result
- for k, o in zip(key_refs, obj_refs):
- result[self._read_object(k)] = self._read_object(o)
-
+ try:
+ for k, o in zip(key_refs, obj_refs):
+ result[self._read_object(k)] = self._read_object(o)
+ except TypeError:
+ raise InvalidFileException()
else:
raise InvalidFileException()