#1368247: make set_charset/MIMEText automatically encode unicode _payload.
Fixes (mysterious, to the end user) UnicodeErrors when using utf-8 as
the charset and unicode as the _text argument. Also makes the way in
which unicode gets encoded to quoted printable for other charsets more
sane (it only worked by accident previously). The _payload now is encoded
to the charset.output_charset if it is unicode.
diff --git a/Lib/email/message.py b/Lib/email/message.py
index 993a1ac..08423cd 100644
--- a/Lib/email/message.py
+++ b/Lib/email/message.py
@@ -256,6 +256,8 @@
charset=charset.get_output_charset())
else:
self.set_param('charset', charset.get_output_charset())
+ if isinstance(self._payload, unicode):
+ self._payload = self._payload.encode(charset.output_charset)
if str(charset) != charset.get_output_charset():
self._payload = charset.body_encode(self._payload)
if 'Content-Transfer-Encoding' not in self:
diff --git a/Lib/email/test/test_email.py b/Lib/email/test/test_email.py
index bf41be7..7d01079 100644
--- a/Lib/email/test/test_email.py
+++ b/Lib/email/test/test_email.py
@@ -1045,6 +1045,31 @@
eq(msg.get_charset().input_charset, 'us-ascii')
eq(msg['content-type'], 'text/plain; charset="us-ascii"')
+ def test_7bit_unicode_input(self):
+ eq = self.assertEqual
+ msg = MIMEText(u'hello there', _charset='us-ascii')
+ eq(msg.get_charset().input_charset, 'us-ascii')
+ eq(msg['content-type'], 'text/plain; charset="us-ascii"')
+
+ def test_7bit_unicode_input_no_charset(self):
+ eq = self.assertEqual
+ msg = MIMEText(u'hello there')
+ eq(msg.get_charset(), 'us-ascii')
+ eq(msg['content-type'], 'text/plain; charset="us-ascii"')
+ self.assertTrue('hello there' in msg.as_string())
+
+ def test_8bit_unicode_input(self):
+ teststr = u'\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
+ eq = self.assertEqual
+ msg = MIMEText(teststr, _charset='utf-8')
+ eq(msg.get_charset().output_charset, 'utf-8')
+ eq(msg['content-type'], 'text/plain; charset="utf-8"')
+ eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
+
+ def test_8bit_unicode_input_no_charset(self):
+ teststr = u'\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
+ self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
+
# Test complicated multipart/* messages