bpo-27397: Make email module properly handle invalid-length base64 strings (GH-7583) (GH-7664) When attempting to base64-decode a payload of invalid length (1 mod 4), properly recognize and handle it. The given data will be returned as-is, i.e. not decoded, along with a new defect, InvalidBase64LengthDefect. (cherry picked from commit c3f55be7dd012b7e92901627d0b31c21e983ccb4) Co-authored-by: Tal Einat <taleinat+github@gmail.com>

commit: 7b82281c80d0064559866afe92f19cae5978c841 [log] [tgz]
author: Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com> Tue Jun 12 06:46:11 2018 -0700
committer: Tal Einat <taleinat+github@gmail.com> Tue Jun 12 16:46:11 2018 +0300
tree: 4f78f92d0667944be27d755cfb6a3272774575f8
parent: 66cea5cb1be7976fb5e63887e5c357011f2bd131 [diff]
diff --git a/Lib/email/_encoded_words.py b/Lib/email/_encoded_words.py
index c40ffa9..295ae7e 100644
--- a/Lib/email/_encoded_words.py
+++ b/Lib/email/_encoded_words.py

@@ -98,30 +98,42 @@
 #
 
 def decode_b(encoded):
-    defects = []
+    # First try encoding with validate=True, fixing the padding if needed.
+    # This will succeed only if encoded includes no invalid characters.
     pad_err = len(encoded) % 4
-    if pad_err:
-        defects.append(errors.InvalidBase64PaddingDefect())
-        padded_encoded = encoded + b'==='[:4-pad_err]
-    else:
-        padded_encoded = encoded
+    missing_padding = b'==='[:4-pad_err] if pad_err else b''
     try:
-        return base64.b64decode(padded_encoded, validate=True), defects
+        return (
+            base64.b64decode(encoded + missing_padding, validate=True),
+            [errors.InvalidBase64PaddingDefect()] if pad_err else [],
+        )
     except binascii.Error:
-        # Since we had correct padding, this must an invalid char error.
-        defects = [errors.InvalidBase64CharactersDefect()]
+        # Since we had correct padding, this is likely an invalid char error.
+        #
         # The non-alphabet characters are ignored as far as padding
-        # goes, but we don't know how many there are.  So we'll just
-        # try various padding lengths until something works.
-        for i in 0, 1, 2, 3:
+        # goes, but we don't know how many there are.  So try without adding
+        # padding to see if it works.
+        try:
+            return (
+                base64.b64decode(encoded, validate=False),
+                [errors.InvalidBase64CharactersDefect()],
+            )
+        except binascii.Error:
+            # Add as much padding as could possibly be necessary (extra padding
+            # is ignored).
             try:
-                return base64.b64decode(encoded+b'='*i, validate=False), defects
+                return (
+                    base64.b64decode(encoded + b'==', validate=False),
+                    [errors.InvalidBase64CharactersDefect(),
+                     errors.InvalidBase64PaddingDefect()],
+                )
             except binascii.Error:
-                if i==0:
-                    defects.append(errors.InvalidBase64PaddingDefect())
-        else:
-            # This should never happen.
-            raise AssertionError("unexpected binascii.Error")
+                # This only happens when the encoded string's length is 1 more
+                # than a multiple of 4, which is invalid.
+                #
+                # bpo-27397: Just return the encoded string since there's no
+                # way to decode.
+                return encoded, [errors.InvalidBase64LengthDefect()]
 
 def encode_b(bstring):
     return base64.b64encode(bstring).decode('ascii')

diff --git a/Lib/email/errors.py b/Lib/email/errors.py
index 791239f..d28a680 100644
--- a/Lib/email/errors.py
+++ b/Lib/email/errors.py

@@ -73,6 +73,9 @@
 class InvalidBase64CharactersDefect(MessageDefect):
     """base64 encoded sequence had characters not in base64 alphabet"""
 
+class InvalidBase64LengthDefect(MessageDefect):
+    """base64 encoded sequence had invalid length (1 mod 4)"""
+
 # These errors are specific to header parsing.
 
 class HeaderDefect(MessageDefect):
commit	7b82281c80d0064559866afe92f19cae5978c841	[log] [tgz]
author	Miss Islington (bot) <31488909+miss-islington@users.noreply.github.com>	Tue Jun 12 06:46:11 2018 -0700
committer	Tal Einat <taleinat+github@gmail.com>	Tue Jun 12 16:46:11 2018 +0300
tree	4f78f92d0667944be27d755cfb6a3272774575f8
parent	66cea5cb1be7976fb5e63887e5c357011f2bd131 [diff]