bpo-27321 Fix email.generator.py to not replace a non-existent header. (GH-18074)


This PR replaces GH-1977. The reason for the replacement is two-fold.

The fix itself is different is that if the CTE header doesn't exist in the original message, it is inserted. This is important because the new CTE could be quoted-printable whereas the original is implicit 8bit.

Also the tests are different. The test_nonascii_as_string_without_cte test in GH-1977 doesn't actually test the issue in that it passes without the fix. The test_nonascii_as_string_without_content_type_and_cte test is improved here, and even though it doesn't fail without the fix, it is included for completeness.

Automerge-Triggered-By: @warsaw
(cherry picked from commit bf838227c35212709dc43b3c3c57f8e1655c1d24)

Co-authored-by: Mark Sapiro <mark@msapiro.net>
diff --git a/Lib/email/generator.py b/Lib/email/generator.py
index ae670c2..c9b1216 100644
--- a/Lib/email/generator.py
+++ b/Lib/email/generator.py
@@ -186,7 +186,11 @@
         # If we munged the cte, copy the message again and re-fix the CTE.
         if munge_cte:
             msg = deepcopy(msg)
-            msg.replace_header('content-transfer-encoding', munge_cte[0])
+            # Preserve the header order if the CTE header already exists.
+            if msg.get('content-transfer-encoding') is None:
+                msg['Content-Transfer-Encoding'] = munge_cte[0]
+            else:
+                msg.replace_header('content-transfer-encoding', munge_cte[0])
             msg.replace_header('content-type', munge_cte[1])
         # Write the headers.  First we see if the message object wants to
         # handle that itself.  If not, we'll do it generically.
diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
index 59eabb0..ab68cdd 100644
--- a/Lib/test/test_email/test_email.py
+++ b/Lib/test/test_email/test_email.py
@@ -311,6 +311,41 @@
         g.flatten(msg)
         self.assertEqual(fullrepr, s.getvalue())
 
+    def test_nonascii_as_string_without_cte(self):
+        m = textwrap.dedent("""\
+            MIME-Version: 1.0
+            Content-type: text/plain; charset="iso-8859-1"
+
+            Test if non-ascii messages with no Content-Transfer-Encoding set
+            can be as_string'd:
+            Föö bär
+            """)
+        source = m.encode('iso-8859-1')
+        expected = textwrap.dedent("""\
+            MIME-Version: 1.0
+            Content-type: text/plain; charset="iso-8859-1"
+            Content-Transfer-Encoding: quoted-printable
+
+            Test if non-ascii messages with no Content-Transfer-Encoding set
+            can be as_string'd:
+            F=F6=F6 b=E4r
+            """)
+        msg = email.message_from_bytes(source)
+        self.assertEqual(msg.as_string(), expected)
+
+    def test_nonascii_as_string_without_content_type_and_cte(self):
+        m = textwrap.dedent("""\
+            MIME-Version: 1.0
+
+            Test if non-ascii messages with no Content-Type nor
+            Content-Transfer-Encoding set can be as_string'd:
+            Föö bär
+            """)
+        source = m.encode('iso-8859-1')
+        expected = source.decode('ascii', 'replace')
+        msg = email.message_from_bytes(source)
+        self.assertEqual(msg.as_string(), expected)
+
     def test_as_bytes(self):
         msg = self._msgobj('msg_01.txt')
         with openfile('msg_01.txt') as fp: