blob: 51a9438fa1fb0613bc5fab69b9423ab495da6809 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
R. David Murray719a4492010-11-21 16:53:48 +00005import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00006import time
7import base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +00008import unittest
R. David Murray96fd54e2010-10-08 15:55:28 +00009import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000010
R. David Murray96fd54e2010-10-08 15:55:28 +000011from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000012from itertools import chain
13
14import email
R David Murrayc27e5222012-05-25 15:01:48 -040015import email.policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016
17from email.charset import Charset
18from email.header import Header, decode_header, make_header
19from email.parser import Parser, HeaderParser
R David Murray638d40b2012-08-24 11:14:13 -040020from email.generator import Generator, DecodedGenerator, BytesGenerator
Guido van Rossum8b3febe2007-08-30 01:15:14 +000021from email.message import Message
22from email.mime.application import MIMEApplication
23from email.mime.audio import MIMEAudio
24from email.mime.text import MIMEText
25from email.mime.image import MIMEImage
26from email.mime.base import MIMEBase
27from email.mime.message import MIMEMessage
28from email.mime.multipart import MIMEMultipart
29from email import utils
30from email import errors
31from email import encoders
32from email import iterators
33from email import base64mime
34from email import quoprimime
35
R David Murray965794e2013-03-07 18:16:47 -050036from test.support import unlink
R David Murraya256bac2011-03-31 12:20:23 -040037from test.test_email import openfile, TestEmailBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +000038
R David Murray612528d2013-03-15 20:38:15 -040039# These imports are documented to work, but we are testing them using a
40# different path, so we import them here just to make sure they are importable.
41from email.parser import FeedParser, BytesFeedParser
42
Guido van Rossum8b3febe2007-08-30 01:15:14 +000043NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Guido van Rossum8b3febe2007-08-30 01:15:14 +000048# Test various aspects of the Message class's API
49class TestMessageAPI(TestEmailBase):
50 def test_get_all(self):
51 eq = self.assertEqual
52 msg = self._msgobj('msg_20.txt')
53 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
54 eq(msg.get_all('xx', 'n/a'), 'n/a')
55
R. David Murraye5db2632010-11-20 15:10:13 +000056 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000057 eq = self.assertEqual
58 msg = Message()
59 eq(msg.get_charset(), None)
60 charset = Charset('iso-8859-1')
61 msg.set_charset(charset)
62 eq(msg['mime-version'], '1.0')
63 eq(msg.get_content_type(), 'text/plain')
64 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
65 eq(msg.get_param('charset'), 'iso-8859-1')
66 eq(msg['content-transfer-encoding'], 'quoted-printable')
67 eq(msg.get_charset().input_charset, 'iso-8859-1')
68 # Remove the charset
69 msg.set_charset(None)
70 eq(msg.get_charset(), None)
71 eq(msg['content-type'], 'text/plain')
72 # Try adding a charset when there's already MIME headers present
73 msg = Message()
74 msg['MIME-Version'] = '2.0'
75 msg['Content-Type'] = 'text/x-weird'
76 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
77 msg.set_charset(charset)
78 eq(msg['mime-version'], '2.0')
79 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
80 eq(msg['content-transfer-encoding'], 'quinted-puntable')
81
82 def test_set_charset_from_string(self):
83 eq = self.assertEqual
84 msg = Message()
85 msg.set_charset('us-ascii')
86 eq(msg.get_charset().input_charset, 'us-ascii')
87 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
88
89 def test_set_payload_with_charset(self):
90 msg = Message()
91 charset = Charset('iso-8859-1')
92 msg.set_payload('This is a string payload', charset)
93 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
94
R David Murray50bfbb92013-12-11 16:52:11 -050095 def test_set_payload_with_8bit_data_and_charset(self):
96 data = b'\xd0\x90\xd0\x91\xd0\x92'
97 charset = Charset('utf-8')
98 msg = Message()
99 msg.set_payload(data, charset)
100 self.assertEqual(msg['content-transfer-encoding'], 'base64')
101 self.assertEqual(msg.get_payload(decode=True), data)
102 self.assertEqual(msg.get_payload(), '0JDQkdCS\n')
103
104 def test_set_payload_with_non_ascii_and_charset_body_encoding_none(self):
105 data = b'\xd0\x90\xd0\x91\xd0\x92'
106 charset = Charset('utf-8')
107 charset.body_encoding = None # Disable base64 encoding
108 msg = Message()
109 msg.set_payload(data.decode('utf-8'), charset)
110 self.assertEqual(msg['content-transfer-encoding'], '8bit')
111 self.assertEqual(msg.get_payload(decode=True), data)
112
113 def test_set_payload_with_8bit_data_and_charset_body_encoding_none(self):
114 data = b'\xd0\x90\xd0\x91\xd0\x92'
115 charset = Charset('utf-8')
116 charset.body_encoding = None # Disable base64 encoding
117 msg = Message()
118 msg.set_payload(data, charset)
119 self.assertEqual(msg['content-transfer-encoding'], '8bit')
120 self.assertEqual(msg.get_payload(decode=True), data)
121
122 def test_set_payload_to_list(self):
123 msg = Message()
124 msg.set_payload([])
125 self.assertEqual(msg.get_payload(), [])
126
127 def test_set_payload_with_non_ascii_and_no_charset_raises(self):
128 data = b'\xd0\x90\xd0\x91\xd0\x92'.decode('utf-8')
129 msg = Message()
130 with self.assertRaises(TypeError):
131 msg.set_payload(data)
132
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000133 def test_get_charsets(self):
134 eq = self.assertEqual
135
136 msg = self._msgobj('msg_08.txt')
137 charsets = msg.get_charsets()
138 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
139
140 msg = self._msgobj('msg_09.txt')
141 charsets = msg.get_charsets('dingbat')
142 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
143 'koi8-r'])
144
145 msg = self._msgobj('msg_12.txt')
146 charsets = msg.get_charsets()
147 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
148 'iso-8859-3', 'us-ascii', 'koi8-r'])
149
150 def test_get_filename(self):
151 eq = self.assertEqual
152
153 msg = self._msgobj('msg_04.txt')
154 filenames = [p.get_filename() for p in msg.get_payload()]
155 eq(filenames, ['msg.txt', 'msg.txt'])
156
157 msg = self._msgobj('msg_07.txt')
158 subpart = msg.get_payload(1)
159 eq(subpart.get_filename(), 'dingusfish.gif')
160
161 def test_get_filename_with_name_parameter(self):
162 eq = self.assertEqual
163
164 msg = self._msgobj('msg_44.txt')
165 filenames = [p.get_filename() for p in msg.get_payload()]
166 eq(filenames, ['msg.txt', 'msg.txt'])
167
168 def test_get_boundary(self):
169 eq = self.assertEqual
170 msg = self._msgobj('msg_07.txt')
171 # No quotes!
172 eq(msg.get_boundary(), 'BOUNDARY')
173
174 def test_set_boundary(self):
175 eq = self.assertEqual
176 # This one has no existing boundary parameter, but the Content-Type:
177 # header appears fifth.
178 msg = self._msgobj('msg_01.txt')
179 msg.set_boundary('BOUNDARY')
180 header, value = msg.items()[4]
181 eq(header.lower(), 'content-type')
182 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
183 # This one has a Content-Type: header, with a boundary, stuck in the
184 # middle of its headers. Make sure the order is preserved; it should
185 # be fifth.
186 msg = self._msgobj('msg_04.txt')
187 msg.set_boundary('BOUNDARY')
188 header, value = msg.items()[4]
189 eq(header.lower(), 'content-type')
190 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
191 # And this one has no Content-Type: header at all.
192 msg = self._msgobj('msg_03.txt')
193 self.assertRaises(errors.HeaderParseError,
194 msg.set_boundary, 'BOUNDARY')
195
R. David Murray73a559d2010-12-21 18:07:59 +0000196 def test_make_boundary(self):
197 msg = MIMEMultipart('form-data')
198 # Note that when the boundary gets created is an implementation
199 # detail and might change.
200 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
201 # Trigger creation of boundary
202 msg.as_string()
203 self.assertEqual(msg.items()[0][1][:33],
204 'multipart/form-data; boundary="==')
205 # XXX: there ought to be tests of the uniqueness of the boundary, too.
206
R. David Murray57c45ac2010-02-21 04:39:40 +0000207 def test_message_rfc822_only(self):
208 # Issue 7970: message/rfc822 not in multipart parsed by
209 # HeaderParser caused an exception when flattened.
R David Murray28346b82011-03-31 11:40:20 -0400210 with openfile('msg_46.txt') as fp:
Brett Cannon384917a2010-10-29 23:08:36 +0000211 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000212 parser = HeaderParser()
213 msg = parser.parsestr(msgdata)
214 out = StringIO()
215 gen = Generator(out, True, 0)
216 gen.flatten(msg, False)
217 self.assertEqual(out.getvalue(), msgdata)
218
R David Murrayb35c8502011-04-13 16:46:05 -0400219 def test_byte_message_rfc822_only(self):
220 # Make sure new bytes header parser also passes this.
Terry Jan Reedy740d6b62013-08-31 17:12:21 -0400221 with openfile('msg_46.txt') as fp:
222 msgdata = fp.read().encode('ascii')
R David Murrayb35c8502011-04-13 16:46:05 -0400223 parser = email.parser.BytesHeaderParser()
224 msg = parser.parsebytes(msgdata)
225 out = BytesIO()
226 gen = email.generator.BytesGenerator(out)
227 gen.flatten(msg)
228 self.assertEqual(out.getvalue(), msgdata)
229
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000230 def test_get_decoded_payload(self):
231 eq = self.assertEqual
232 msg = self._msgobj('msg_10.txt')
233 # The outer message is a multipart
234 eq(msg.get_payload(decode=True), None)
235 # Subpart 1 is 7bit encoded
236 eq(msg.get_payload(0).get_payload(decode=True),
237 b'This is a 7bit encoded message.\n')
238 # Subpart 2 is quopri
239 eq(msg.get_payload(1).get_payload(decode=True),
240 b'\xa1This is a Quoted Printable encoded message!\n')
241 # Subpart 3 is base64
242 eq(msg.get_payload(2).get_payload(decode=True),
243 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000244 # Subpart 4 is base64 with a trailing newline, which
245 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000246 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000247 b'This is a Base64 encoded message.\n')
248 # Subpart 5 has no Content-Transfer-Encoding: header.
249 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000250 b'This has no Content-Transfer-Encoding: header.\n')
251
252 def test_get_decoded_uu_payload(self):
253 eq = self.assertEqual
254 msg = Message()
255 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
256 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
257 msg['content-transfer-encoding'] = cte
258 eq(msg.get_payload(decode=True), b'hello world')
259 # Now try some bogus data
260 msg.set_payload('foo')
261 eq(msg.get_payload(decode=True), b'foo')
262
R David Murraya2860e82011-04-16 09:20:30 -0400263 def test_get_payload_n_raises_on_non_multipart(self):
264 msg = Message()
265 self.assertRaises(TypeError, msg.get_payload, 1)
266
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000267 def test_decoded_generator(self):
268 eq = self.assertEqual
269 msg = self._msgobj('msg_07.txt')
270 with openfile('msg_17.txt') as fp:
271 text = fp.read()
272 s = StringIO()
273 g = DecodedGenerator(s)
274 g.flatten(msg)
275 eq(s.getvalue(), text)
276
277 def test__contains__(self):
278 msg = Message()
279 msg['From'] = 'Me'
280 msg['to'] = 'You'
281 # Check for case insensitivity
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +0200282 self.assertIn('from', msg)
283 self.assertIn('From', msg)
284 self.assertIn('FROM', msg)
285 self.assertIn('to', msg)
286 self.assertIn('To', msg)
287 self.assertIn('TO', msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000288
289 def test_as_string(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000290 msg = self._msgobj('msg_01.txt')
291 with openfile('msg_01.txt') as fp:
292 text = fp.read()
R David Murraybb17d2b2013-08-09 16:15:28 -0400293 self.assertEqual(text, str(msg))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000294 fullrepr = msg.as_string(unixfrom=True)
295 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000296 self.assertTrue(lines[0].startswith('From '))
R David Murraybb17d2b2013-08-09 16:15:28 -0400297 self.assertEqual(text, NL.join(lines[1:]))
298
299 def test_as_string_policy(self):
300 msg = self._msgobj('msg_01.txt')
301 newpolicy = msg.policy.clone(linesep='\r\n')
302 fullrepr = msg.as_string(policy=newpolicy)
303 s = StringIO()
304 g = Generator(s, policy=newpolicy)
305 g.flatten(msg)
306 self.assertEqual(fullrepr, s.getvalue())
307
308 def test_as_bytes(self):
309 msg = self._msgobj('msg_01.txt')
Terry Jan Reedy7e7cf8b2013-08-31 17:16:45 -0400310 with openfile('msg_01.txt') as fp:
311 data = fp.read().encode('ascii')
R David Murraybb17d2b2013-08-09 16:15:28 -0400312 self.assertEqual(data, bytes(msg))
313 fullrepr = msg.as_bytes(unixfrom=True)
314 lines = fullrepr.split(b'\n')
315 self.assertTrue(lines[0].startswith(b'From '))
316 self.assertEqual(data, b'\n'.join(lines[1:]))
317
318 def test_as_bytes_policy(self):
319 msg = self._msgobj('msg_01.txt')
320 newpolicy = msg.policy.clone(linesep='\r\n')
321 fullrepr = msg.as_bytes(policy=newpolicy)
322 s = BytesIO()
323 g = BytesGenerator(s,policy=newpolicy)
324 g.flatten(msg)
325 self.assertEqual(fullrepr, s.getvalue())
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000326
R David Murray97f43c02012-06-24 05:03:27 -0400327 # test_headerregistry.TestContentTypeHeader.bad_params
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000328 def test_bad_param(self):
329 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
330 self.assertEqual(msg.get_param('baz'), '')
331
332 def test_missing_filename(self):
333 msg = email.message_from_string("From: foo\n")
334 self.assertEqual(msg.get_filename(), None)
335
336 def test_bogus_filename(self):
337 msg = email.message_from_string(
338 "Content-Disposition: blarg; filename\n")
339 self.assertEqual(msg.get_filename(), '')
340
341 def test_missing_boundary(self):
342 msg = email.message_from_string("From: foo\n")
343 self.assertEqual(msg.get_boundary(), None)
344
345 def test_get_params(self):
346 eq = self.assertEqual
347 msg = email.message_from_string(
348 'X-Header: foo=one; bar=two; baz=three\n')
349 eq(msg.get_params(header='x-header'),
350 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
351 msg = email.message_from_string(
352 'X-Header: foo; bar=one; baz=two\n')
353 eq(msg.get_params(header='x-header'),
354 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
355 eq(msg.get_params(), None)
356 msg = email.message_from_string(
357 'X-Header: foo; bar="one"; baz=two\n')
358 eq(msg.get_params(header='x-header'),
359 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
360
R David Murray97f43c02012-06-24 05:03:27 -0400361 # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000362 def test_get_param_liberal(self):
363 msg = Message()
364 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
365 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
366
367 def test_get_param(self):
368 eq = self.assertEqual
369 msg = email.message_from_string(
370 "X-Header: foo=one; bar=two; baz=three\n")
371 eq(msg.get_param('bar', header='x-header'), 'two')
372 eq(msg.get_param('quuz', header='x-header'), None)
373 eq(msg.get_param('quuz'), None)
374 msg = email.message_from_string(
375 'X-Header: foo; bar="one"; baz=two\n')
376 eq(msg.get_param('foo', header='x-header'), '')
377 eq(msg.get_param('bar', header='x-header'), 'one')
378 eq(msg.get_param('baz', header='x-header'), 'two')
379 # XXX: We are not RFC-2045 compliant! We cannot parse:
380 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
381 # msg.get_param("weird")
382 # yet.
383
R David Murray97f43c02012-06-24 05:03:27 -0400384 # test_headerregistry.TestContentTypeHeader.spaces_around_semis
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000385 def test_get_param_funky_continuation_lines(self):
386 msg = self._msgobj('msg_22.txt')
387 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
388
R David Murray97f43c02012-06-24 05:03:27 -0400389 # test_headerregistry.TestContentTypeHeader.semis_inside_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000390 def test_get_param_with_semis_in_quotes(self):
391 msg = email.message_from_string(
392 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
393 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
394 self.assertEqual(msg.get_param('name', unquote=False),
395 '"Jim&amp;&amp;Jill"')
396
R David Murray97f43c02012-06-24 05:03:27 -0400397 # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value
R. David Murrayd48739f2010-04-14 18:59:18 +0000398 def test_get_param_with_quotes(self):
399 msg = email.message_from_string(
400 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
401 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
402 msg = email.message_from_string(
403 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
404 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
405
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000406 def test_field_containment(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000407 msg = email.message_from_string('Header: exists')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +0200408 self.assertIn('header', msg)
409 self.assertIn('Header', msg)
410 self.assertIn('HEADER', msg)
411 self.assertNotIn('headerx', msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000412
413 def test_set_param(self):
414 eq = self.assertEqual
415 msg = Message()
416 msg.set_param('charset', 'iso-2022-jp')
417 eq(msg.get_param('charset'), 'iso-2022-jp')
418 msg.set_param('importance', 'high value')
419 eq(msg.get_param('importance'), 'high value')
420 eq(msg.get_param('importance', unquote=False), '"high value"')
421 eq(msg.get_params(), [('text/plain', ''),
422 ('charset', 'iso-2022-jp'),
423 ('importance', 'high value')])
424 eq(msg.get_params(unquote=False), [('text/plain', ''),
425 ('charset', '"iso-2022-jp"'),
426 ('importance', '"high value"')])
427 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
428 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
429
430 def test_del_param(self):
431 eq = self.assertEqual
432 msg = self._msgobj('msg_05.txt')
433 eq(msg.get_params(),
434 [('multipart/report', ''), ('report-type', 'delivery-status'),
435 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
436 old_val = msg.get_param("report-type")
437 msg.del_param("report-type")
438 eq(msg.get_params(),
439 [('multipart/report', ''),
440 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
441 msg.set_param("report-type", old_val)
442 eq(msg.get_params(),
443 [('multipart/report', ''),
444 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
445 ('report-type', old_val)])
446
447 def test_del_param_on_other_header(self):
448 msg = Message()
449 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
450 msg.del_param('filename', 'content-disposition')
451 self.assertEqual(msg['content-disposition'], 'attachment')
452
R David Murraya2860e82011-04-16 09:20:30 -0400453 def test_del_param_on_nonexistent_header(self):
454 msg = Message()
R David Murray271ade82013-07-25 12:11:55 -0400455 # Deleting param on empty msg should not raise exception.
R David Murraya2860e82011-04-16 09:20:30 -0400456 msg.del_param('filename', 'content-disposition')
457
458 def test_del_nonexistent_param(self):
459 msg = Message()
460 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
461 existing_header = msg['Content-Type']
462 msg.del_param('foobar', header='Content-Type')
R David Murray271ade82013-07-25 12:11:55 -0400463 self.assertEqual(msg['Content-Type'], existing_header)
R David Murraya2860e82011-04-16 09:20:30 -0400464
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000465 def test_set_type(self):
466 eq = self.assertEqual
467 msg = Message()
468 self.assertRaises(ValueError, msg.set_type, 'text')
469 msg.set_type('text/plain')
470 eq(msg['content-type'], 'text/plain')
471 msg.set_param('charset', 'us-ascii')
472 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
473 msg.set_type('text/html')
474 eq(msg['content-type'], 'text/html; charset="us-ascii"')
475
476 def test_set_type_on_other_header(self):
477 msg = Message()
478 msg['X-Content-Type'] = 'text/plain'
479 msg.set_type('application/octet-stream', 'X-Content-Type')
480 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
481
482 def test_get_content_type_missing(self):
483 msg = Message()
484 self.assertEqual(msg.get_content_type(), 'text/plain')
485
486 def test_get_content_type_missing_with_default_type(self):
487 msg = Message()
488 msg.set_default_type('message/rfc822')
489 self.assertEqual(msg.get_content_type(), 'message/rfc822')
490
491 def test_get_content_type_from_message_implicit(self):
492 msg = self._msgobj('msg_30.txt')
493 self.assertEqual(msg.get_payload(0).get_content_type(),
494 'message/rfc822')
495
496 def test_get_content_type_from_message_explicit(self):
497 msg = self._msgobj('msg_28.txt')
498 self.assertEqual(msg.get_payload(0).get_content_type(),
499 'message/rfc822')
500
501 def test_get_content_type_from_message_text_plain_implicit(self):
502 msg = self._msgobj('msg_03.txt')
503 self.assertEqual(msg.get_content_type(), 'text/plain')
504
505 def test_get_content_type_from_message_text_plain_explicit(self):
506 msg = self._msgobj('msg_01.txt')
507 self.assertEqual(msg.get_content_type(), 'text/plain')
508
509 def test_get_content_maintype_missing(self):
510 msg = Message()
511 self.assertEqual(msg.get_content_maintype(), 'text')
512
513 def test_get_content_maintype_missing_with_default_type(self):
514 msg = Message()
515 msg.set_default_type('message/rfc822')
516 self.assertEqual(msg.get_content_maintype(), 'message')
517
518 def test_get_content_maintype_from_message_implicit(self):
519 msg = self._msgobj('msg_30.txt')
520 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
521
522 def test_get_content_maintype_from_message_explicit(self):
523 msg = self._msgobj('msg_28.txt')
524 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
525
526 def test_get_content_maintype_from_message_text_plain_implicit(self):
527 msg = self._msgobj('msg_03.txt')
528 self.assertEqual(msg.get_content_maintype(), 'text')
529
530 def test_get_content_maintype_from_message_text_plain_explicit(self):
531 msg = self._msgobj('msg_01.txt')
532 self.assertEqual(msg.get_content_maintype(), 'text')
533
534 def test_get_content_subtype_missing(self):
535 msg = Message()
536 self.assertEqual(msg.get_content_subtype(), 'plain')
537
538 def test_get_content_subtype_missing_with_default_type(self):
539 msg = Message()
540 msg.set_default_type('message/rfc822')
541 self.assertEqual(msg.get_content_subtype(), 'rfc822')
542
543 def test_get_content_subtype_from_message_implicit(self):
544 msg = self._msgobj('msg_30.txt')
545 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
546
547 def test_get_content_subtype_from_message_explicit(self):
548 msg = self._msgobj('msg_28.txt')
549 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
550
551 def test_get_content_subtype_from_message_text_plain_implicit(self):
552 msg = self._msgobj('msg_03.txt')
553 self.assertEqual(msg.get_content_subtype(), 'plain')
554
555 def test_get_content_subtype_from_message_text_plain_explicit(self):
556 msg = self._msgobj('msg_01.txt')
557 self.assertEqual(msg.get_content_subtype(), 'plain')
558
559 def test_get_content_maintype_error(self):
560 msg = Message()
561 msg['Content-Type'] = 'no-slash-in-this-string'
562 self.assertEqual(msg.get_content_maintype(), 'text')
563
564 def test_get_content_subtype_error(self):
565 msg = Message()
566 msg['Content-Type'] = 'no-slash-in-this-string'
567 self.assertEqual(msg.get_content_subtype(), 'plain')
568
569 def test_replace_header(self):
570 eq = self.assertEqual
571 msg = Message()
572 msg.add_header('First', 'One')
573 msg.add_header('Second', 'Two')
574 msg.add_header('Third', 'Three')
575 eq(msg.keys(), ['First', 'Second', 'Third'])
576 eq(msg.values(), ['One', 'Two', 'Three'])
577 msg.replace_header('Second', 'Twenty')
578 eq(msg.keys(), ['First', 'Second', 'Third'])
579 eq(msg.values(), ['One', 'Twenty', 'Three'])
580 msg.add_header('First', 'Eleven')
581 msg.replace_header('First', 'One Hundred')
582 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
583 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
584 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
585
R David Murray80e0aee2012-05-27 21:23:34 -0400586 # test_defect_handling:test_invalid_chars_in_base64_payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000587 def test_broken_base64_payload(self):
588 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
589 msg = Message()
590 msg['content-type'] = 'audio/x-midi'
591 msg['content-transfer-encoding'] = 'base64'
592 msg.set_payload(x)
593 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -0400594 (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0'
595 b'\xa1\x00p\xf6\xbf\xe9\x0f'))
596 self.assertIsInstance(msg.defects[0],
597 errors.InvalidBase64CharactersDefect)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000598
R David Murraya2860e82011-04-16 09:20:30 -0400599 def test_questionable_bytes_payload(self):
600 # This test improves coverage but is not a compliance test,
R David Murray50bfbb92013-12-11 16:52:11 -0500601 # since it involves poking inside the black box in a way
602 # that actually breaks the model invariants.
R David Murraya2860e82011-04-16 09:20:30 -0400603 x = 'this is a quéstionable thing to do'.encode('utf-8')
604 msg = Message()
605 msg['content-type'] = 'text/plain; charset="utf-8"'
606 msg['content-transfer-encoding'] = '8bit'
607 msg._payload = x
608 self.assertEqual(msg.get_payload(decode=True), x)
609
R. David Murray7ec754b2010-12-13 23:51:19 +0000610 # Issue 1078919
611 def test_ascii_add_header(self):
612 msg = Message()
613 msg.add_header('Content-Disposition', 'attachment',
614 filename='bud.gif')
615 self.assertEqual('attachment; filename="bud.gif"',
616 msg['Content-Disposition'])
617
618 def test_noascii_add_header(self):
619 msg = Message()
620 msg.add_header('Content-Disposition', 'attachment',
621 filename="Fußballer.ppt")
622 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000623 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000624 msg['Content-Disposition'])
625
626 def test_nonascii_add_header_via_triple(self):
627 msg = Message()
628 msg.add_header('Content-Disposition', 'attachment',
629 filename=('iso-8859-1', '', 'Fußballer.ppt'))
630 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000631 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
632 msg['Content-Disposition'])
633
634 def test_ascii_add_header_with_tspecial(self):
635 msg = Message()
636 msg.add_header('Content-Disposition', 'attachment',
637 filename="windows [filename].ppt")
638 self.assertEqual(
639 'attachment; filename="windows [filename].ppt"',
640 msg['Content-Disposition'])
641
642 def test_nonascii_add_header_with_tspecial(self):
643 msg = Message()
644 msg.add_header('Content-Disposition', 'attachment',
645 filename="Fußballer [filename].ppt")
646 self.assertEqual(
647 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000648 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000649
R David Murray00ae4352013-08-21 21:10:31 -0400650 def test_binary_quopri_payload(self):
651 for charset in ('latin-1', 'ascii'):
652 msg = Message()
653 msg['content-type'] = 'text/plain; charset=%s' % charset
654 msg['content-transfer-encoding'] = 'quoted-printable'
655 msg.set_payload(b'foo=e6=96=87bar')
656 self.assertEqual(
657 msg.get_payload(decode=True),
658 b'foo\xe6\x96\x87bar',
659 'get_payload returns wrong result with charset %s.' % charset)
660
661 def test_binary_base64_payload(self):
662 for charset in ('latin-1', 'ascii'):
663 msg = Message()
664 msg['content-type'] = 'text/plain; charset=%s' % charset
665 msg['content-transfer-encoding'] = 'base64'
666 msg.set_payload(b'Zm9v5paHYmFy')
667 self.assertEqual(
668 msg.get_payload(decode=True),
669 b'foo\xe6\x96\x87bar',
670 'get_payload returns wrong result with charset %s.' % charset)
671
672 def test_binary_uuencode_payload(self):
673 for charset in ('latin-1', 'ascii'):
674 for encoding in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
675 msg = Message()
676 msg['content-type'] = 'text/plain; charset=%s' % charset
677 msg['content-transfer-encoding'] = encoding
678 msg.set_payload(b"begin 666 -\n)9F]OYI:'8F%R\n \nend\n")
679 self.assertEqual(
680 msg.get_payload(decode=True),
681 b'foo\xe6\x96\x87bar',
682 str(('get_payload returns wrong result ',
683 'with charset {0} and encoding {1}.')).\
684 format(charset, encoding))
685
R David Murraya2860e82011-04-16 09:20:30 -0400686 def test_add_header_with_name_only_param(self):
687 msg = Message()
688 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
689 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
690
691 def test_add_header_with_no_value(self):
692 msg = Message()
693 msg.add_header('X-Status', None)
694 self.assertEqual('', msg['X-Status'])
695
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000696 # Issue 5871: reject an attempt to embed a header inside a header value
697 # (header injection attack).
698 def test_embeded_header_via_Header_rejected(self):
699 msg = Message()
700 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
701 self.assertRaises(errors.HeaderParseError, msg.as_string)
702
703 def test_embeded_header_via_string_rejected(self):
704 msg = Message()
705 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
706 self.assertRaises(errors.HeaderParseError, msg.as_string)
707
R David Murray7441a7a2012-03-14 02:59:51 -0400708 def test_unicode_header_defaults_to_utf8_encoding(self):
709 # Issue 14291
710 m = MIMEText('abc\n')
711 m['Subject'] = 'É test'
712 self.assertEqual(str(m),textwrap.dedent("""\
713 Content-Type: text/plain; charset="us-ascii"
714 MIME-Version: 1.0
715 Content-Transfer-Encoding: 7bit
716 Subject: =?utf-8?q?=C3=89_test?=
717
718 abc
719 """))
720
R David Murray8680bcc2012-03-22 22:17:51 -0400721 def test_unicode_body_defaults_to_utf8_encoding(self):
722 # Issue 14291
723 m = MIMEText('É testabc\n')
724 self.assertEqual(str(m),textwrap.dedent("""\
R David Murray8680bcc2012-03-22 22:17:51 -0400725 Content-Type: text/plain; charset="utf-8"
R David Murray42243c42012-03-22 22:40:44 -0400726 MIME-Version: 1.0
R David Murray8680bcc2012-03-22 22:17:51 -0400727 Content-Transfer-Encoding: base64
728
729 w4kgdGVzdGFiYwo=
730 """))
731
732
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000733# Test the email.encoders module
734class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400735
736 def test_EncodersEncode_base64(self):
737 with openfile('PyBanner048.gif', 'rb') as fp:
738 bindata = fp.read()
739 mimed = email.mime.image.MIMEImage(bindata)
740 base64ed = mimed.get_payload()
741 # the transfer-encoded body lines should all be <=76 characters
742 lines = base64ed.split('\n')
743 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
744
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000745 def test_encode_empty_payload(self):
746 eq = self.assertEqual
747 msg = Message()
748 msg.set_charset('us-ascii')
749 eq(msg['content-transfer-encoding'], '7bit')
750
751 def test_default_cte(self):
752 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000753 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000754 msg = MIMEText('hello world')
755 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000756 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000757 msg = MIMEText('hello \xf8 world')
R David Murray8680bcc2012-03-22 22:17:51 -0400758 eq(msg['content-transfer-encoding'], 'base64')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000759 # And now with a different charset
760 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
761 eq(msg['content-transfer-encoding'], 'quoted-printable')
762
R. David Murraye85200d2010-05-06 01:41:14 +0000763 def test_encode7or8bit(self):
764 # Make sure a charset whose input character set is 8bit but
765 # whose output character set is 7bit gets a transfer-encoding
766 # of 7bit.
767 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000768 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000769 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000770
R David Murrayf581b372013-02-05 10:49:49 -0500771 def test_qp_encode_latin1(self):
772 msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1')
773 self.assertEqual(str(msg), textwrap.dedent("""\
774 MIME-Version: 1.0
775 Content-Type: text/text; charset="iso-8859-1"
776 Content-Transfer-Encoding: quoted-printable
777
778 =E1=F6
779 """))
780
781 def test_qp_encode_non_latin1(self):
782 # Issue 16948
783 msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2')
784 self.assertEqual(str(msg), textwrap.dedent("""\
785 MIME-Version: 1.0
786 Content-Type: text/text; charset="iso-8859-2"
787 Content-Transfer-Encoding: quoted-printable
788
789 =BF
790 """))
791
Ezio Melottib3aedd42010-11-20 19:04:17 +0000792
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000793# Test long header wrapping
794class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400795
796 maxDiff = None
797
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000798 def test_split_long_continuation(self):
799 eq = self.ndiffAssertEqual
800 msg = email.message_from_string("""\
801Subject: bug demonstration
802\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
803\tmore text
804
805test
806""")
807 sfp = StringIO()
808 g = Generator(sfp)
809 g.flatten(msg)
810 eq(sfp.getvalue(), """\
811Subject: bug demonstration
812\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
813\tmore text
814
815test
816""")
817
818 def test_another_long_almost_unsplittable_header(self):
819 eq = self.ndiffAssertEqual
820 hstr = """\
821bug demonstration
822\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
823\tmore text"""
824 h = Header(hstr, continuation_ws='\t')
825 eq(h.encode(), """\
826bug demonstration
827\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
828\tmore text""")
829 h = Header(hstr.replace('\t', ' '))
830 eq(h.encode(), """\
831bug demonstration
832 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
833 more text""")
834
835 def test_long_nonstring(self):
836 eq = self.ndiffAssertEqual
837 g = Charset("iso-8859-1")
838 cz = Charset("iso-8859-2")
839 utf8 = Charset("utf-8")
840 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
841 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
842 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
843 b'bef\xf6rdert. ')
844 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
845 b'd\xf9vtipu.. ')
846 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
847 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
848 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
849 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
850 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
851 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
852 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
853 '\u3044\u307e\u3059\u3002')
854 h = Header(g_head, g, header_name='Subject')
855 h.append(cz_head, cz)
856 h.append(utf8_head, utf8)
857 msg = Message()
858 msg['Subject'] = h
859 sfp = StringIO()
860 g = Generator(sfp)
861 g.flatten(msg)
862 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000863Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
864 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
865 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
866 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
867 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
868 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
869 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
870 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
871 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
872 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
873 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000874
875""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000876 eq(h.encode(maxlinelen=76), """\
877=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
878 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
879 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
880 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
881 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
882 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
883 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
884 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
885 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
886 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
887 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000888
889 def test_long_header_encode(self):
890 eq = self.ndiffAssertEqual
891 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
892 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
893 header_name='X-Foobar-Spoink-Defrobnit')
894 eq(h.encode(), '''\
895wasnipoop; giraffes="very-long-necked-animals";
896 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
897
898 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
899 eq = self.ndiffAssertEqual
900 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
901 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
902 header_name='X-Foobar-Spoink-Defrobnit',
903 continuation_ws='\t')
904 eq(h.encode(), '''\
905wasnipoop; giraffes="very-long-necked-animals";
906 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
907
908 def test_long_header_encode_with_tab_continuation(self):
909 eq = self.ndiffAssertEqual
910 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
911 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
912 header_name='X-Foobar-Spoink-Defrobnit',
913 continuation_ws='\t')
914 eq(h.encode(), '''\
915wasnipoop; giraffes="very-long-necked-animals";
916\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
917
R David Murray3a6152f2011-03-14 21:13:03 -0400918 def test_header_encode_with_different_output_charset(self):
919 h = Header('文', 'euc-jp')
920 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
921
922 def test_long_header_encode_with_different_output_charset(self):
923 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
924 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
925 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
926 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
927 res = """\
928=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
929 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
930 self.assertEqual(h.encode(), res)
931
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000932 def test_header_splitter(self):
933 eq = self.ndiffAssertEqual
934 msg = MIMEText('')
935 # It'd be great if we could use add_header() here, but that doesn't
936 # guarantee an order of the parameters.
937 msg['X-Foobar-Spoink-Defrobnit'] = (
938 'wasnipoop; giraffes="very-long-necked-animals"; '
939 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
940 sfp = StringIO()
941 g = Generator(sfp)
942 g.flatten(msg)
943 eq(sfp.getvalue(), '''\
944Content-Type: text/plain; charset="us-ascii"
945MIME-Version: 1.0
946Content-Transfer-Encoding: 7bit
947X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
948 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
949
950''')
951
952 def test_no_semis_header_splitter(self):
953 eq = self.ndiffAssertEqual
954 msg = Message()
955 msg['From'] = 'test@dom.ain'
956 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
957 msg.set_payload('Test')
958 sfp = StringIO()
959 g = Generator(sfp)
960 g.flatten(msg)
961 eq(sfp.getvalue(), """\
962From: test@dom.ain
963References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
964 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
965
966Test""")
967
R David Murray7da4db12011-04-07 20:37:17 -0400968 def test_last_split_chunk_does_not_fit(self):
969 eq = self.ndiffAssertEqual
970 h = Header('Subject: the first part of this is short, but_the_second'
971 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
972 '_all_by_itself')
973 eq(h.encode(), """\
974Subject: the first part of this is short,
975 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
976
977 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
978 eq = self.ndiffAssertEqual
979 h = Header(', but_the_second'
980 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
981 '_all_by_itself')
982 eq(h.encode(), """\
983,
984 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
985
986 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
987 eq = self.ndiffAssertEqual
988 h = Header(', , but_the_second'
989 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
990 '_all_by_itself')
991 eq(h.encode(), """\
992, ,
993 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
994
995 def test_trailing_splitable_on_overlong_unsplitable(self):
996 eq = self.ndiffAssertEqual
997 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
998 'be_on_a_line_all_by_itself;')
999 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
1000 "be_on_a_line_all_by_itself;")
1001
1002 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
1003 eq = self.ndiffAssertEqual
1004 h = Header('; '
1005 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -04001006 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -04001007 eq(h.encode(), """\
1008;
R David Murray01581ee2011-04-18 10:04:34 -04001009 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -04001010
R David Murraye1292a22011-04-07 20:54:03 -04001011 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -04001012 eq = self.ndiffAssertEqual
1013 h = Header('This is a long line that has two whitespaces in a row. '
1014 'This used to cause truncation of the header when folded')
1015 eq(h.encode(), """\
1016This is a long line that has two whitespaces in a row. This used to cause
1017 truncation of the header when folded""")
1018
Ezio Melotti1c4810b2013-08-10 18:57:12 +03001019 def test_splitter_split_on_punctuation_only_if_fws_with_header(self):
R David Murray01581ee2011-04-18 10:04:34 -04001020 eq = self.ndiffAssertEqual
1021 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
1022 'they;arenotlegal;fold,points')
1023 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
1024 "arenotlegal;fold,points")
1025
1026 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
1027 eq = self.ndiffAssertEqual
1028 h = Header('this is a test where we need to have more than one line '
1029 'before; our final line that is just too big to fit;; '
1030 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1031 'be_on_a_line_all_by_itself;')
1032 eq(h.encode(), """\
1033this is a test where we need to have more than one line before;
1034 our final line that is just too big to fit;;
1035 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
1036
1037 def test_overlong_last_part_followed_by_split_point(self):
1038 eq = self.ndiffAssertEqual
1039 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1040 'be_on_a_line_all_by_itself ')
1041 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
1042 "should_be_on_a_line_all_by_itself ")
1043
1044 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
1045 eq = self.ndiffAssertEqual
1046 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
1047 'before_our_final_line_; ; '
1048 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1049 'be_on_a_line_all_by_itself; ')
1050 eq(h.encode(), """\
1051this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
1052 ;
1053 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1054
1055 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
1056 eq = self.ndiffAssertEqual
1057 h = Header('this is a test where we need to have more than one line '
1058 'before our final line; ; '
1059 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1060 'be_on_a_line_all_by_itself; ')
1061 eq(h.encode(), """\
1062this is a test where we need to have more than one line before our final line;
1063 ;
1064 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1065
1066 def test_long_header_with_whitespace_runs(self):
1067 eq = self.ndiffAssertEqual
1068 msg = Message()
1069 msg['From'] = 'test@dom.ain'
1070 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
1071 msg.set_payload('Test')
1072 sfp = StringIO()
1073 g = Generator(sfp)
1074 g.flatten(msg)
1075 eq(sfp.getvalue(), """\
1076From: test@dom.ain
1077References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1078 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1079 <foo@dom.ain> <foo@dom.ain>\x20\x20
1080
1081Test""")
1082
1083 def test_long_run_with_semi_header_splitter(self):
1084 eq = self.ndiffAssertEqual
1085 msg = Message()
1086 msg['From'] = 'test@dom.ain'
1087 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
1088 msg.set_payload('Test')
1089 sfp = StringIO()
1090 g = Generator(sfp)
1091 g.flatten(msg)
1092 eq(sfp.getvalue(), """\
1093From: test@dom.ain
1094References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1095 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1096 <foo@dom.ain>; abc
1097
1098Test""")
1099
1100 def test_splitter_split_on_punctuation_only_if_fws(self):
1101 eq = self.ndiffAssertEqual
1102 msg = Message()
1103 msg['From'] = 'test@dom.ain'
1104 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
1105 'they;arenotlegal;fold,points')
1106 msg.set_payload('Test')
1107 sfp = StringIO()
1108 g = Generator(sfp)
1109 g.flatten(msg)
1110 # XXX the space after the header should not be there.
1111 eq(sfp.getvalue(), """\
1112From: test@dom.ain
1113References:\x20
1114 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
1115
1116Test""")
1117
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001118 def test_no_split_long_header(self):
1119 eq = self.ndiffAssertEqual
1120 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +00001121 h = Header(hstr)
1122 # These come on two lines because Headers are really field value
1123 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001124 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001125References:
1126 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1127 h = Header('x' * 80)
1128 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001129
1130 def test_splitting_multiple_long_lines(self):
1131 eq = self.ndiffAssertEqual
1132 hstr = """\
1133from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1134\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1135\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1136"""
1137 h = Header(hstr, continuation_ws='\t')
1138 eq(h.encode(), """\
1139from babylon.socal-raves.org (localhost [127.0.0.1]);
1140 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1141 for <mailman-admin@babylon.socal-raves.org>;
1142 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1143\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1144 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1145 for <mailman-admin@babylon.socal-raves.org>;
1146 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1147\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1148 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1149 for <mailman-admin@babylon.socal-raves.org>;
1150 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1151
1152 def test_splitting_first_line_only_is_long(self):
1153 eq = self.ndiffAssertEqual
1154 hstr = """\
1155from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1156\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1157\tid 17k4h5-00034i-00
1158\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1159 h = Header(hstr, maxlinelen=78, header_name='Received',
1160 continuation_ws='\t')
1161 eq(h.encode(), """\
1162from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1163 helo=cthulhu.gerg.ca)
1164\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1165\tid 17k4h5-00034i-00
1166\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1167
1168 def test_long_8bit_header(self):
1169 eq = self.ndiffAssertEqual
1170 msg = Message()
1171 h = Header('Britische Regierung gibt', 'iso-8859-1',
1172 header_name='Subject')
1173 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001174 eq(h.encode(maxlinelen=76), """\
1175=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1176 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001177 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001178 eq(msg.as_string(maxheaderlen=76), """\
1179Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1180 =?iso-8859-1?q?hore-Windkraftprojekte?=
1181
1182""")
1183 eq(msg.as_string(maxheaderlen=0), """\
1184Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001185
1186""")
1187
1188 def test_long_8bit_header_no_charset(self):
1189 eq = self.ndiffAssertEqual
1190 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001191 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1192 'f\xfcr Offshore-Windkraftprojekte '
1193 '<a-very-long-address@example.com>')
1194 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001195 eq(msg.as_string(maxheaderlen=78), """\
1196Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1197 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1198
1199""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001200 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001201 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001202 header_name='Reply-To')
1203 eq(msg.as_string(maxheaderlen=78), """\
1204Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1205 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001206
1207""")
1208
1209 def test_long_to_header(self):
1210 eq = self.ndiffAssertEqual
1211 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001212 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001213 '"Someone Test #B" <someone@umich.edu>, '
1214 '"Someone Test #C" <someone@eecs.umich.edu>, '
1215 '"Someone Test #D" <someone@eecs.umich.edu>')
1216 msg = Message()
1217 msg['To'] = to
1218 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001219To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001220 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001221 "Someone Test #C" <someone@eecs.umich.edu>,
1222 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001223
1224''')
1225
1226 def test_long_line_after_append(self):
1227 eq = self.ndiffAssertEqual
1228 s = 'This is an example of string which has almost the limit of header length.'
1229 h = Header(s)
1230 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001231 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001232This is an example of string which has almost the limit of header length.
1233 Add another line.""")
1234
1235 def test_shorter_line_with_append(self):
1236 eq = self.ndiffAssertEqual
1237 s = 'This is a shorter line.'
1238 h = Header(s)
1239 h.append('Add another sentence. (Surprise?)')
1240 eq(h.encode(),
1241 'This is a shorter line. Add another sentence. (Surprise?)')
1242
1243 def test_long_field_name(self):
1244 eq = self.ndiffAssertEqual
1245 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001246 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1247 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1248 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1249 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001250 h = Header(gs, 'iso-8859-1', header_name=fn)
1251 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001252 eq(h.encode(maxlinelen=76), """\
1253=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1254 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1255 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1256 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001257
1258 def test_long_received_header(self):
1259 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1260 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1261 'Wed, 05 Mar 2003 18:10:18 -0700')
1262 msg = Message()
1263 msg['Received-1'] = Header(h, continuation_ws='\t')
1264 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001265 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001266 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001267Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1268 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001269 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001270Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1271 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001272 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001273
1274""")
1275
1276 def test_string_headerinst_eq(self):
1277 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1278 'tu-muenchen.de> (David Bremner\'s message of '
1279 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1280 msg = Message()
1281 msg['Received-1'] = Header(h, header_name='Received-1',
1282 continuation_ws='\t')
1283 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001284 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001285 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001286Received-1:\x20
1287 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1288 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1289Received-2:\x20
1290 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1291 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001292
1293""")
1294
1295 def test_long_unbreakable_lines_with_continuation(self):
1296 eq = self.ndiffAssertEqual
1297 msg = Message()
1298 t = """\
1299iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1300 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1301 msg['Face-1'] = t
1302 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001303 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001304 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001305 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001306 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001307Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001308 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001309 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001310Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001311 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001312 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001313Face-3:\x20
1314 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1315 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001316
1317""")
1318
1319 def test_another_long_multiline_header(self):
1320 eq = self.ndiffAssertEqual
1321 m = ('Received: from siimage.com '
1322 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001323 'Microsoft SMTPSVC(5.0.2195.4905); '
1324 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001325 msg = email.message_from_string(m)
1326 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001327Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1328 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001329
1330''')
1331
1332 def test_long_lines_with_different_header(self):
1333 eq = self.ndiffAssertEqual
1334 h = ('List-Unsubscribe: '
1335 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1336 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1337 '?subject=unsubscribe>')
1338 msg = Message()
1339 msg['List'] = h
1340 msg['List'] = Header(h, header_name='List')
1341 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001342List: List-Unsubscribe:
1343 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001344 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001345List: List-Unsubscribe:
1346 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001347 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001348
1349""")
1350
R. David Murray6f0022d2011-01-07 21:57:25 +00001351 def test_long_rfc2047_header_with_embedded_fws(self):
1352 h = Header(textwrap.dedent("""\
1353 We're going to pretend this header is in a non-ascii character set
1354 \tto see if line wrapping with encoded words and embedded
1355 folding white space works"""),
1356 charset='utf-8',
1357 header_name='Test')
1358 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1359 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1360 =?utf-8?q?cter_set?=
1361 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1362 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1363
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001364
Ezio Melottib3aedd42010-11-20 19:04:17 +00001365
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001366# Test mangling of "From " lines in the body of a message
1367class TestFromMangling(unittest.TestCase):
1368 def setUp(self):
1369 self.msg = Message()
1370 self.msg['From'] = 'aaa@bbb.org'
1371 self.msg.set_payload("""\
1372From the desk of A.A.A.:
1373Blah blah blah
1374""")
1375
1376 def test_mangled_from(self):
1377 s = StringIO()
1378 g = Generator(s, mangle_from_=True)
1379 g.flatten(self.msg)
1380 self.assertEqual(s.getvalue(), """\
1381From: aaa@bbb.org
1382
1383>From the desk of A.A.A.:
1384Blah blah blah
1385""")
1386
1387 def test_dont_mangle_from(self):
1388 s = StringIO()
1389 g = Generator(s, mangle_from_=False)
1390 g.flatten(self.msg)
1391 self.assertEqual(s.getvalue(), """\
1392From: aaa@bbb.org
1393
1394From the desk of A.A.A.:
1395Blah blah blah
1396""")
1397
R David Murray6a31bc62012-07-22 21:47:53 -04001398 def test_mangle_from_in_preamble_and_epilog(self):
1399 s = StringIO()
1400 g = Generator(s, mangle_from_=True)
1401 msg = email.message_from_string(textwrap.dedent("""\
1402 From: foo@bar.com
1403 Mime-Version: 1.0
1404 Content-Type: multipart/mixed; boundary=XXX
1405
1406 From somewhere unknown
1407
1408 --XXX
1409 Content-Type: text/plain
1410
1411 foo
1412
1413 --XXX--
1414
1415 From somewhere unknowable
1416 """))
1417 g.flatten(msg)
1418 self.assertEqual(len([1 for x in s.getvalue().split('\n')
1419 if x.startswith('>From ')]), 2)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001420
R David Murray638d40b2012-08-24 11:14:13 -04001421 def test_mangled_from_with_bad_bytes(self):
1422 source = textwrap.dedent("""\
1423 Content-Type: text/plain; charset="utf-8"
1424 MIME-Version: 1.0
1425 Content-Transfer-Encoding: 8bit
1426 From: aaa@bbb.org
1427
1428 """).encode('utf-8')
1429 msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')
1430 b = BytesIO()
1431 g = BytesGenerator(b, mangle_from_=True)
1432 g.flatten(msg)
1433 self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')
1434
Ezio Melottib3aedd42010-11-20 19:04:17 +00001435
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001436# Test the basic MIMEAudio class
1437class TestMIMEAudio(unittest.TestCase):
1438 def setUp(self):
R David Murray28346b82011-03-31 11:40:20 -04001439 with openfile('audiotest.au', 'rb') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001440 self._audiodata = fp.read()
1441 self._au = MIMEAudio(self._audiodata)
1442
1443 def test_guess_minor_type(self):
1444 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1445
1446 def test_encoding(self):
1447 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001448 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1449 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001450
1451 def test_checkSetMinor(self):
1452 au = MIMEAudio(self._audiodata, 'fish')
1453 self.assertEqual(au.get_content_type(), 'audio/fish')
1454
1455 def test_add_header(self):
1456 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001457 self._au.add_header('Content-Disposition', 'attachment',
1458 filename='audiotest.au')
1459 eq(self._au['content-disposition'],
1460 'attachment; filename="audiotest.au"')
1461 eq(self._au.get_params(header='content-disposition'),
1462 [('attachment', ''), ('filename', 'audiotest.au')])
1463 eq(self._au.get_param('filename', header='content-disposition'),
1464 'audiotest.au')
1465 missing = []
1466 eq(self._au.get_param('attachment', header='content-disposition'), '')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02001467 self.assertIs(self._au.get_param('foo', failobj=missing,
1468 header='content-disposition'), missing)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001469 # Try some missing stuff
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02001470 self.assertIs(self._au.get_param('foobar', missing), missing)
1471 self.assertIs(self._au.get_param('attachment', missing,
1472 header='foobar'), missing)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001473
1474
Ezio Melottib3aedd42010-11-20 19:04:17 +00001475
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001476# Test the basic MIMEImage class
1477class TestMIMEImage(unittest.TestCase):
1478 def setUp(self):
1479 with openfile('PyBanner048.gif', 'rb') as fp:
1480 self._imgdata = fp.read()
1481 self._im = MIMEImage(self._imgdata)
1482
1483 def test_guess_minor_type(self):
1484 self.assertEqual(self._im.get_content_type(), 'image/gif')
1485
1486 def test_encoding(self):
1487 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001488 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1489 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001490
1491 def test_checkSetMinor(self):
1492 im = MIMEImage(self._imgdata, 'fish')
1493 self.assertEqual(im.get_content_type(), 'image/fish')
1494
1495 def test_add_header(self):
1496 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001497 self._im.add_header('Content-Disposition', 'attachment',
1498 filename='dingusfish.gif')
1499 eq(self._im['content-disposition'],
1500 'attachment; filename="dingusfish.gif"')
1501 eq(self._im.get_params(header='content-disposition'),
1502 [('attachment', ''), ('filename', 'dingusfish.gif')])
1503 eq(self._im.get_param('filename', header='content-disposition'),
1504 'dingusfish.gif')
1505 missing = []
1506 eq(self._im.get_param('attachment', header='content-disposition'), '')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02001507 self.assertIs(self._im.get_param('foo', failobj=missing,
1508 header='content-disposition'), missing)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001509 # Try some missing stuff
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02001510 self.assertIs(self._im.get_param('foobar', missing), missing)
1511 self.assertIs(self._im.get_param('attachment', missing,
1512 header='foobar'), missing)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001513
1514
Ezio Melottib3aedd42010-11-20 19:04:17 +00001515
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001516# Test the basic MIMEApplication class
1517class TestMIMEApplication(unittest.TestCase):
1518 def test_headers(self):
1519 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001520 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001521 eq(msg.get_content_type(), 'application/octet-stream')
1522 eq(msg['content-transfer-encoding'], 'base64')
1523
1524 def test_body(self):
1525 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001526 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1527 msg = MIMEApplication(bytesdata)
1528 # whitespace in the cte encoded block is RFC-irrelevant.
1529 eq(msg.get_payload().strip(), '+vv8/f7/')
1530 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001531
R David Murrayec317a82013-02-11 10:51:28 -05001532 def test_binary_body_with_encode_7or8bit(self):
1533 # Issue 17171.
1534 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1535 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit)
1536 # Treated as a string, this will be invalid code points.
1537 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1538 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1539 self.assertEqual(msg['Content-Transfer-Encoding'], '8bit')
1540 s = BytesIO()
1541 g = BytesGenerator(s)
1542 g.flatten(msg)
1543 wireform = s.getvalue()
1544 msg2 = email.message_from_bytes(wireform)
1545 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1546 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1547 self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit')
1548
1549 def test_binary_body_with_encode_noop(self):
R David Murrayceaa8b12013-02-09 13:02:58 -05001550 # Issue 16564: This does not produce an RFC valid message, since to be
1551 # valid it should have a CTE of binary. But the below works in
1552 # Python2, and is documented as working this way.
1553 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1554 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1555 # Treated as a string, this will be invalid code points.
1556 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1557 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1558 s = BytesIO()
1559 g = BytesGenerator(s)
1560 g.flatten(msg)
1561 wireform = s.getvalue()
1562 msg2 = email.message_from_bytes(wireform)
1563 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1564 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001565
R David Murrayf6069f92013-06-27 18:37:00 -04001566 def test_binary_body_with_encode_quopri(self):
1567 # Issue 14360.
1568 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff '
1569 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_quopri)
1570 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1571 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1572 self.assertEqual(msg['Content-Transfer-Encoding'], 'quoted-printable')
1573 s = BytesIO()
1574 g = BytesGenerator(s)
1575 g.flatten(msg)
1576 wireform = s.getvalue()
1577 msg2 = email.message_from_bytes(wireform)
1578 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1579 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1580 self.assertEqual(msg2['Content-Transfer-Encoding'], 'quoted-printable')
1581
1582 def test_binary_body_with_encode_base64(self):
1583 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1584 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_base64)
1585 self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1586 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1587 s = BytesIO()
1588 g = BytesGenerator(s)
1589 g.flatten(msg)
1590 wireform = s.getvalue()
1591 msg2 = email.message_from_bytes(wireform)
1592 self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1593 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1594
Ezio Melottib3aedd42010-11-20 19:04:17 +00001595
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001596# Test the basic MIMEText class
1597class TestMIMEText(unittest.TestCase):
1598 def setUp(self):
1599 self._msg = MIMEText('hello there')
1600
1601 def test_types(self):
1602 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001603 eq(self._msg.get_content_type(), 'text/plain')
1604 eq(self._msg.get_param('charset'), 'us-ascii')
1605 missing = []
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02001606 self.assertIs(self._msg.get_param('foobar', missing), missing)
1607 self.assertIs(self._msg.get_param('charset', missing, header='foobar'),
1608 missing)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001609
1610 def test_payload(self):
1611 self.assertEqual(self._msg.get_payload(), 'hello there')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02001612 self.assertFalse(self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001613
1614 def test_charset(self):
1615 eq = self.assertEqual
1616 msg = MIMEText('hello there', _charset='us-ascii')
1617 eq(msg.get_charset().input_charset, 'us-ascii')
1618 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1619
R. David Murray850fc852010-06-03 01:58:28 +00001620 def test_7bit_input(self):
1621 eq = self.assertEqual
1622 msg = MIMEText('hello there', _charset='us-ascii')
1623 eq(msg.get_charset().input_charset, 'us-ascii')
1624 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1625
1626 def test_7bit_input_no_charset(self):
1627 eq = self.assertEqual
1628 msg = MIMEText('hello there')
1629 eq(msg.get_charset(), 'us-ascii')
1630 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02001631 self.assertIn('hello there', msg.as_string())
R. David Murray850fc852010-06-03 01:58:28 +00001632
1633 def test_utf8_input(self):
1634 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1635 eq = self.assertEqual
1636 msg = MIMEText(teststr, _charset='utf-8')
1637 eq(msg.get_charset().output_charset, 'utf-8')
1638 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1639 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1640
1641 @unittest.skip("can't fix because of backward compat in email5, "
1642 "will fix in email6")
1643 def test_utf8_input_no_charset(self):
1644 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1645 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1646
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001647
Ezio Melottib3aedd42010-11-20 19:04:17 +00001648
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001649# Test complicated multipart/* messages
1650class TestMultipart(TestEmailBase):
1651 def setUp(self):
1652 with openfile('PyBanner048.gif', 'rb') as fp:
1653 data = fp.read()
1654 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1655 image = MIMEImage(data, name='dingusfish.gif')
1656 image.add_header('content-disposition', 'attachment',
1657 filename='dingusfish.gif')
1658 intro = MIMEText('''\
1659Hi there,
1660
1661This is the dingus fish.
1662''')
1663 container.attach(intro)
1664 container.attach(image)
1665 container['From'] = 'Barry <barry@digicool.com>'
1666 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1667 container['Subject'] = 'Here is your dingus fish'
1668
1669 now = 987809702.54848599
1670 timetuple = time.localtime(now)
1671 if timetuple[-1] == 0:
1672 tzsecs = time.timezone
1673 else:
1674 tzsecs = time.altzone
1675 if tzsecs > 0:
1676 sign = '-'
1677 else:
1678 sign = '+'
1679 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1680 container['Date'] = time.strftime(
1681 '%a, %d %b %Y %H:%M:%S',
1682 time.localtime(now)) + tzoffset
1683 self._msg = container
1684 self._im = image
1685 self._txt = intro
1686
1687 def test_hierarchy(self):
1688 # convenience
1689 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001690 raises = self.assertRaises
1691 # tests
1692 m = self._msg
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02001693 self.assertTrue(m.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001694 eq(m.get_content_type(), 'multipart/mixed')
1695 eq(len(m.get_payload()), 2)
1696 raises(IndexError, m.get_payload, 2)
1697 m0 = m.get_payload(0)
1698 m1 = m.get_payload(1)
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02001699 self.assertIs(m0, self._txt)
1700 self.assertIs(m1, self._im)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001701 eq(m.get_payload(), [m0, m1])
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02001702 self.assertFalse(m0.is_multipart())
1703 self.assertFalse(m1.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001704
1705 def test_empty_multipart_idempotent(self):
1706 text = """\
1707Content-Type: multipart/mixed; boundary="BOUNDARY"
1708MIME-Version: 1.0
1709Subject: A subject
1710To: aperson@dom.ain
1711From: bperson@dom.ain
1712
1713
1714--BOUNDARY
1715
1716
1717--BOUNDARY--
1718"""
1719 msg = Parser().parsestr(text)
1720 self.ndiffAssertEqual(text, msg.as_string())
1721
1722 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1723 outer = MIMEBase('multipart', 'mixed')
1724 outer['Subject'] = 'A subject'
1725 outer['To'] = 'aperson@dom.ain'
1726 outer['From'] = 'bperson@dom.ain'
1727 outer.set_boundary('BOUNDARY')
1728 self.ndiffAssertEqual(outer.as_string(), '''\
1729Content-Type: multipart/mixed; boundary="BOUNDARY"
1730MIME-Version: 1.0
1731Subject: A subject
1732To: aperson@dom.ain
1733From: bperson@dom.ain
1734
1735--BOUNDARY
1736
1737--BOUNDARY--''')
1738
1739 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1740 outer = MIMEBase('multipart', 'mixed')
1741 outer['Subject'] = 'A subject'
1742 outer['To'] = 'aperson@dom.ain'
1743 outer['From'] = 'bperson@dom.ain'
1744 outer.preamble = ''
1745 outer.epilogue = ''
1746 outer.set_boundary('BOUNDARY')
1747 self.ndiffAssertEqual(outer.as_string(), '''\
1748Content-Type: multipart/mixed; boundary="BOUNDARY"
1749MIME-Version: 1.0
1750Subject: A subject
1751To: aperson@dom.ain
1752From: bperson@dom.ain
1753
1754
1755--BOUNDARY
1756
1757--BOUNDARY--
1758''')
1759
1760 def test_one_part_in_a_multipart(self):
1761 eq = self.ndiffAssertEqual
1762 outer = MIMEBase('multipart', 'mixed')
1763 outer['Subject'] = 'A subject'
1764 outer['To'] = 'aperson@dom.ain'
1765 outer['From'] = 'bperson@dom.ain'
1766 outer.set_boundary('BOUNDARY')
1767 msg = MIMEText('hello world')
1768 outer.attach(msg)
1769 eq(outer.as_string(), '''\
1770Content-Type: multipart/mixed; boundary="BOUNDARY"
1771MIME-Version: 1.0
1772Subject: A subject
1773To: aperson@dom.ain
1774From: bperson@dom.ain
1775
1776--BOUNDARY
1777Content-Type: text/plain; charset="us-ascii"
1778MIME-Version: 1.0
1779Content-Transfer-Encoding: 7bit
1780
1781hello world
1782--BOUNDARY--''')
1783
1784 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1785 eq = self.ndiffAssertEqual
1786 outer = MIMEBase('multipart', 'mixed')
1787 outer['Subject'] = 'A subject'
1788 outer['To'] = 'aperson@dom.ain'
1789 outer['From'] = 'bperson@dom.ain'
1790 outer.preamble = ''
1791 msg = MIMEText('hello world')
1792 outer.attach(msg)
1793 outer.set_boundary('BOUNDARY')
1794 eq(outer.as_string(), '''\
1795Content-Type: multipart/mixed; boundary="BOUNDARY"
1796MIME-Version: 1.0
1797Subject: A subject
1798To: aperson@dom.ain
1799From: bperson@dom.ain
1800
1801
1802--BOUNDARY
1803Content-Type: text/plain; charset="us-ascii"
1804MIME-Version: 1.0
1805Content-Transfer-Encoding: 7bit
1806
1807hello world
1808--BOUNDARY--''')
1809
1810
1811 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1812 eq = self.ndiffAssertEqual
1813 outer = MIMEBase('multipart', 'mixed')
1814 outer['Subject'] = 'A subject'
1815 outer['To'] = 'aperson@dom.ain'
1816 outer['From'] = 'bperson@dom.ain'
1817 outer.preamble = None
1818 msg = MIMEText('hello world')
1819 outer.attach(msg)
1820 outer.set_boundary('BOUNDARY')
1821 eq(outer.as_string(), '''\
1822Content-Type: multipart/mixed; boundary="BOUNDARY"
1823MIME-Version: 1.0
1824Subject: A subject
1825To: aperson@dom.ain
1826From: bperson@dom.ain
1827
1828--BOUNDARY
1829Content-Type: text/plain; charset="us-ascii"
1830MIME-Version: 1.0
1831Content-Transfer-Encoding: 7bit
1832
1833hello world
1834--BOUNDARY--''')
1835
1836
1837 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1838 eq = self.ndiffAssertEqual
1839 outer = MIMEBase('multipart', 'mixed')
1840 outer['Subject'] = 'A subject'
1841 outer['To'] = 'aperson@dom.ain'
1842 outer['From'] = 'bperson@dom.ain'
1843 outer.epilogue = None
1844 msg = MIMEText('hello world')
1845 outer.attach(msg)
1846 outer.set_boundary('BOUNDARY')
1847 eq(outer.as_string(), '''\
1848Content-Type: multipart/mixed; boundary="BOUNDARY"
1849MIME-Version: 1.0
1850Subject: A subject
1851To: aperson@dom.ain
1852From: bperson@dom.ain
1853
1854--BOUNDARY
1855Content-Type: text/plain; charset="us-ascii"
1856MIME-Version: 1.0
1857Content-Transfer-Encoding: 7bit
1858
1859hello world
1860--BOUNDARY--''')
1861
1862
1863 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1864 eq = self.ndiffAssertEqual
1865 outer = MIMEBase('multipart', 'mixed')
1866 outer['Subject'] = 'A subject'
1867 outer['To'] = 'aperson@dom.ain'
1868 outer['From'] = 'bperson@dom.ain'
1869 outer.epilogue = ''
1870 msg = MIMEText('hello world')
1871 outer.attach(msg)
1872 outer.set_boundary('BOUNDARY')
1873 eq(outer.as_string(), '''\
1874Content-Type: multipart/mixed; boundary="BOUNDARY"
1875MIME-Version: 1.0
1876Subject: A subject
1877To: aperson@dom.ain
1878From: bperson@dom.ain
1879
1880--BOUNDARY
1881Content-Type: text/plain; charset="us-ascii"
1882MIME-Version: 1.0
1883Content-Transfer-Encoding: 7bit
1884
1885hello world
1886--BOUNDARY--
1887''')
1888
1889
1890 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1891 eq = self.ndiffAssertEqual
1892 outer = MIMEBase('multipart', 'mixed')
1893 outer['Subject'] = 'A subject'
1894 outer['To'] = 'aperson@dom.ain'
1895 outer['From'] = 'bperson@dom.ain'
1896 outer.epilogue = '\n'
1897 msg = MIMEText('hello world')
1898 outer.attach(msg)
1899 outer.set_boundary('BOUNDARY')
1900 eq(outer.as_string(), '''\
1901Content-Type: multipart/mixed; boundary="BOUNDARY"
1902MIME-Version: 1.0
1903Subject: A subject
1904To: aperson@dom.ain
1905From: bperson@dom.ain
1906
1907--BOUNDARY
1908Content-Type: text/plain; charset="us-ascii"
1909MIME-Version: 1.0
1910Content-Transfer-Encoding: 7bit
1911
1912hello world
1913--BOUNDARY--
1914
1915''')
1916
1917 def test_message_external_body(self):
1918 eq = self.assertEqual
1919 msg = self._msgobj('msg_36.txt')
1920 eq(len(msg.get_payload()), 2)
1921 msg1 = msg.get_payload(1)
1922 eq(msg1.get_content_type(), 'multipart/alternative')
1923 eq(len(msg1.get_payload()), 2)
1924 for subpart in msg1.get_payload():
1925 eq(subpart.get_content_type(), 'message/external-body')
1926 eq(len(subpart.get_payload()), 1)
1927 subsubpart = subpart.get_payload(0)
1928 eq(subsubpart.get_content_type(), 'text/plain')
1929
1930 def test_double_boundary(self):
1931 # msg_37.txt is a multipart that contains two dash-boundary's in a
1932 # row. Our interpretation of RFC 2046 calls for ignoring the second
1933 # and subsequent boundaries.
1934 msg = self._msgobj('msg_37.txt')
1935 self.assertEqual(len(msg.get_payload()), 3)
1936
1937 def test_nested_inner_contains_outer_boundary(self):
1938 eq = self.ndiffAssertEqual
1939 # msg_38.txt has an inner part that contains outer boundaries. My
1940 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1941 # these are illegal and should be interpreted as unterminated inner
1942 # parts.
1943 msg = self._msgobj('msg_38.txt')
1944 sfp = StringIO()
1945 iterators._structure(msg, sfp)
1946 eq(sfp.getvalue(), """\
1947multipart/mixed
1948 multipart/mixed
1949 multipart/alternative
1950 text/plain
1951 text/plain
1952 text/plain
1953 text/plain
1954""")
1955
1956 def test_nested_with_same_boundary(self):
1957 eq = self.ndiffAssertEqual
1958 # msg 39.txt is similarly evil in that it's got inner parts that use
1959 # the same boundary as outer parts. Again, I believe the way this is
1960 # parsed is closest to the spirit of RFC 2046
1961 msg = self._msgobj('msg_39.txt')
1962 sfp = StringIO()
1963 iterators._structure(msg, sfp)
1964 eq(sfp.getvalue(), """\
1965multipart/mixed
1966 multipart/mixed
1967 multipart/alternative
1968 application/octet-stream
1969 application/octet-stream
1970 text/plain
1971""")
1972
1973 def test_boundary_in_non_multipart(self):
1974 msg = self._msgobj('msg_40.txt')
1975 self.assertEqual(msg.as_string(), '''\
1976MIME-Version: 1.0
1977Content-Type: text/html; boundary="--961284236552522269"
1978
1979----961284236552522269
1980Content-Type: text/html;
1981Content-Transfer-Encoding: 7Bit
1982
1983<html></html>
1984
1985----961284236552522269--
1986''')
1987
1988 def test_boundary_with_leading_space(self):
1989 eq = self.assertEqual
1990 msg = email.message_from_string('''\
1991MIME-Version: 1.0
1992Content-Type: multipart/mixed; boundary=" XXXX"
1993
1994-- XXXX
1995Content-Type: text/plain
1996
1997
1998-- XXXX
1999Content-Type: text/plain
2000
2001-- XXXX--
2002''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002003 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002004 eq(msg.get_boundary(), ' XXXX')
2005 eq(len(msg.get_payload()), 2)
2006
2007 def test_boundary_without_trailing_newline(self):
2008 m = Parser().parsestr("""\
2009Content-Type: multipart/mixed; boundary="===============0012394164=="
2010MIME-Version: 1.0
2011
2012--===============0012394164==
2013Content-Type: image/file1.jpg
2014MIME-Version: 1.0
2015Content-Transfer-Encoding: base64
2016
2017YXNkZg==
2018--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00002019 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002020
2021
Ezio Melottib3aedd42010-11-20 19:04:17 +00002022
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002023# Test some badly formatted messages
R David Murrayc27e5222012-05-25 15:01:48 -04002024class TestNonConformant(TestEmailBase):
R David Murray3edd22a2011-04-18 13:59:37 -04002025
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002026 def test_parse_missing_minor_type(self):
2027 eq = self.assertEqual
2028 msg = self._msgobj('msg_14.txt')
2029 eq(msg.get_content_type(), 'text/plain')
2030 eq(msg.get_content_maintype(), 'text')
2031 eq(msg.get_content_subtype(), 'plain')
2032
R David Murray80e0aee2012-05-27 21:23:34 -04002033 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002034 def test_same_boundary_inner_outer(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002035 msg = self._msgobj('msg_15.txt')
2036 # XXX We can probably eventually do better
2037 inner = msg.get_payload(0)
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002038 self.assertTrue(hasattr(inner, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04002039 self.assertEqual(len(inner.defects), 1)
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002040 self.assertIsInstance(inner.defects[0],
2041 errors.StartBoundaryNotFoundDefect)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002042
R David Murray80e0aee2012-05-27 21:23:34 -04002043 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002044 def test_multipart_no_boundary(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002045 msg = self._msgobj('msg_25.txt')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002046 self.assertIsInstance(msg.get_payload(), str)
R David Murrayc27e5222012-05-25 15:01:48 -04002047 self.assertEqual(len(msg.defects), 2)
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002048 self.assertIsInstance(msg.defects[0],
2049 errors.NoBoundaryInMultipartDefect)
2050 self.assertIsInstance(msg.defects[1],
2051 errors.MultipartInvariantViolationDefect)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002052
R David Murray749073a2011-06-22 13:47:53 -04002053 multipart_msg = textwrap.dedent("""\
2054 Date: Wed, 14 Nov 2007 12:56:23 GMT
2055 From: foo@bar.invalid
2056 To: foo@bar.invalid
2057 Subject: Content-Transfer-Encoding: base64 and multipart
2058 MIME-Version: 1.0
2059 Content-Type: multipart/mixed;
2060 boundary="===============3344438784458119861=="{}
2061
2062 --===============3344438784458119861==
2063 Content-Type: text/plain
2064
2065 Test message
2066
2067 --===============3344438784458119861==
2068 Content-Type: application/octet-stream
2069 Content-Transfer-Encoding: base64
2070
2071 YWJj
2072
2073 --===============3344438784458119861==--
2074 """)
2075
R David Murray80e0aee2012-05-27 21:23:34 -04002076 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04002077 def test_multipart_invalid_cte(self):
R David Murrayc27e5222012-05-25 15:01:48 -04002078 msg = self._str_msg(
2079 self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
2080 self.assertEqual(len(msg.defects), 1)
2081 self.assertIsInstance(msg.defects[0],
R David Murray749073a2011-06-22 13:47:53 -04002082 errors.InvalidMultipartContentTransferEncodingDefect)
2083
R David Murray80e0aee2012-05-27 21:23:34 -04002084 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04002085 def test_multipart_no_cte_no_defect(self):
R David Murrayc27e5222012-05-25 15:01:48 -04002086 msg = self._str_msg(self.multipart_msg.format(''))
2087 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04002088
R David Murray80e0aee2012-05-27 21:23:34 -04002089 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04002090 def test_multipart_valid_cte_no_defect(self):
2091 for cte in ('7bit', '8bit', 'BINary'):
R David Murrayc27e5222012-05-25 15:01:48 -04002092 msg = self._str_msg(
R David Murray749073a2011-06-22 13:47:53 -04002093 self.multipart_msg.format(
R David Murrayc27e5222012-05-25 15:01:48 -04002094 "\nContent-Transfer-Encoding: {}".format(cte)))
2095 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04002096
R David Murray97f43c02012-06-24 05:03:27 -04002097 # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002098 def test_invalid_content_type(self):
2099 eq = self.assertEqual
2100 neq = self.ndiffAssertEqual
2101 msg = Message()
2102 # RFC 2045, $5.2 says invalid yields text/plain
2103 msg['Content-Type'] = 'text'
2104 eq(msg.get_content_maintype(), 'text')
2105 eq(msg.get_content_subtype(), 'plain')
2106 eq(msg.get_content_type(), 'text/plain')
2107 # Clear the old value and try something /really/ invalid
2108 del msg['content-type']
2109 msg['Content-Type'] = 'foo'
2110 eq(msg.get_content_maintype(), 'text')
2111 eq(msg.get_content_subtype(), 'plain')
2112 eq(msg.get_content_type(), 'text/plain')
2113 # Still, make sure that the message is idempotently generated
2114 s = StringIO()
2115 g = Generator(s)
2116 g.flatten(msg)
2117 neq(s.getvalue(), 'Content-Type: foo\n\n')
2118
2119 def test_no_start_boundary(self):
2120 eq = self.ndiffAssertEqual
2121 msg = self._msgobj('msg_31.txt')
2122 eq(msg.get_payload(), """\
2123--BOUNDARY
2124Content-Type: text/plain
2125
2126message 1
2127
2128--BOUNDARY
2129Content-Type: text/plain
2130
2131message 2
2132
2133--BOUNDARY--
2134""")
2135
2136 def test_no_separating_blank_line(self):
2137 eq = self.ndiffAssertEqual
2138 msg = self._msgobj('msg_35.txt')
2139 eq(msg.as_string(), """\
2140From: aperson@dom.ain
2141To: bperson@dom.ain
2142Subject: here's something interesting
2143
2144counter to RFC 2822, there's no separating newline here
2145""")
2146
R David Murray80e0aee2012-05-27 21:23:34 -04002147 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002148 def test_lying_multipart(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002149 msg = self._msgobj('msg_41.txt')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002150 self.assertTrue(hasattr(msg, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04002151 self.assertEqual(len(msg.defects), 2)
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002152 self.assertIsInstance(msg.defects[0],
2153 errors.NoBoundaryInMultipartDefect)
2154 self.assertIsInstance(msg.defects[1],
2155 errors.MultipartInvariantViolationDefect)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002156
R David Murray80e0aee2012-05-27 21:23:34 -04002157 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002158 def test_missing_start_boundary(self):
2159 outer = self._msgobj('msg_42.txt')
2160 # The message structure is:
2161 #
2162 # multipart/mixed
2163 # text/plain
2164 # message/rfc822
2165 # multipart/mixed [*]
2166 #
2167 # [*] This message is missing its start boundary
2168 bad = outer.get_payload(1).get_payload(0)
R David Murrayc27e5222012-05-25 15:01:48 -04002169 self.assertEqual(len(bad.defects), 1)
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002170 self.assertIsInstance(bad.defects[0],
2171 errors.StartBoundaryNotFoundDefect)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002172
R David Murray80e0aee2012-05-27 21:23:34 -04002173 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002174 def test_first_line_is_continuation_header(self):
2175 eq = self.assertEqual
R David Murrayadbdcdb2012-05-27 20:45:01 -04002176 m = ' Line 1\nSubject: test\n\nbody'
R David Murrayc27e5222012-05-25 15:01:48 -04002177 msg = email.message_from_string(m)
R David Murrayadbdcdb2012-05-27 20:45:01 -04002178 eq(msg.keys(), ['Subject'])
2179 eq(msg.get_payload(), 'body')
R David Murrayc27e5222012-05-25 15:01:48 -04002180 eq(len(msg.defects), 1)
R David Murrayadbdcdb2012-05-27 20:45:01 -04002181 self.assertDefectsEqual(msg.defects,
2182 [errors.FirstHeaderLineIsContinuationDefect])
R David Murrayc27e5222012-05-25 15:01:48 -04002183 eq(msg.defects[0].line, ' Line 1\n')
R David Murray3edd22a2011-04-18 13:59:37 -04002184
R David Murrayd41595b2012-05-28 20:14:10 -04002185 # test_defect_handling
R David Murrayadbdcdb2012-05-27 20:45:01 -04002186 def test_missing_header_body_separator(self):
2187 # Our heuristic if we see a line that doesn't look like a header (no
2188 # leading whitespace but no ':') is to assume that the blank line that
2189 # separates the header from the body is missing, and to stop parsing
2190 # headers and start parsing the body.
2191 msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
2192 self.assertEqual(msg.keys(), ['Subject'])
2193 self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
2194 self.assertDefectsEqual(msg.defects,
2195 [errors.MissingHeaderBodySeparatorDefect])
2196
Ezio Melottib3aedd42010-11-20 19:04:17 +00002197
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002198# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00002199class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002200 def test_rfc2047_multiline(self):
2201 eq = self.assertEqual
2202 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2203 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2204 dh = decode_header(s)
2205 eq(dh, [
R David Murray07ea53c2012-06-02 17:56:49 -04002206 (b'Re: ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002207 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
R David Murray07ea53c2012-06-02 17:56:49 -04002208 (b' baz foo bar ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002209 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2210 header = make_header(dh)
2211 eq(str(header),
2212 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00002213 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002214Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2215 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002216
R David Murray07ea53c2012-06-02 17:56:49 -04002217 def test_whitespace_keeper_unicode(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002218 eq = self.assertEqual
2219 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2220 dh = decode_header(s)
2221 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
R David Murray07ea53c2012-06-02 17:56:49 -04002222 (b' Pirard <pirard@dom.ain>', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002223 header = str(make_header(dh))
2224 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2225
R David Murray07ea53c2012-06-02 17:56:49 -04002226 def test_whitespace_keeper_unicode_2(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002227 eq = self.assertEqual
2228 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2229 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002230 eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'),
2231 (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002232 hu = str(make_header(dh))
2233 eq(hu, 'The quick brown fox jumped over the lazy dog')
2234
2235 def test_rfc2047_missing_whitespace(self):
2236 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2237 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002238 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2239 (b'rg', None), (b'\xe5', 'iso-8859-1'),
2240 (b'sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002241
2242 def test_rfc2047_with_whitespace(self):
2243 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2244 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002245 self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'),
2246 (b' rg ', None), (b'\xe5', 'iso-8859-1'),
2247 (b' sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002248
R. David Murrayc4e69cc2010-08-03 22:14:10 +00002249 def test_rfc2047_B_bad_padding(self):
2250 s = '=?iso-8859-1?B?%s?='
2251 data = [ # only test complete bytes
2252 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2253 ('dmk=', b'vi'), ('dmk', b'vi')
2254 ]
2255 for q, a in data:
2256 dh = decode_header(s % q)
2257 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002258
R. David Murray31e984c2010-10-01 15:40:20 +00002259 def test_rfc2047_Q_invalid_digits(self):
2260 # issue 10004.
2261 s = '=?iso-8659-1?Q?andr=e9=zz?='
2262 self.assertEqual(decode_header(s),
2263 [(b'andr\xe9=zz', 'iso-8659-1')])
2264
R David Murray07ea53c2012-06-02 17:56:49 -04002265 def test_rfc2047_rfc2047_1(self):
2266 # 1st testcase at end of rfc2047
2267 s = '(=?ISO-8859-1?Q?a?=)'
2268 self.assertEqual(decode_header(s),
2269 [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)])
2270
2271 def test_rfc2047_rfc2047_2(self):
2272 # 2nd testcase at end of rfc2047
2273 s = '(=?ISO-8859-1?Q?a?= b)'
2274 self.assertEqual(decode_header(s),
2275 [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)])
2276
2277 def test_rfc2047_rfc2047_3(self):
2278 # 3rd testcase at end of rfc2047
2279 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2280 self.assertEqual(decode_header(s),
2281 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2282
2283 def test_rfc2047_rfc2047_4(self):
2284 # 4th testcase at end of rfc2047
2285 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2286 self.assertEqual(decode_header(s),
2287 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2288
2289 def test_rfc2047_rfc2047_5a(self):
2290 # 5th testcase at end of rfc2047 newline is \r\n
2291 s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)'
2292 self.assertEqual(decode_header(s),
2293 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2294
2295 def test_rfc2047_rfc2047_5b(self):
2296 # 5th testcase at end of rfc2047 newline is \n
2297 s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)'
2298 self.assertEqual(decode_header(s),
2299 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2300
2301 def test_rfc2047_rfc2047_6(self):
2302 # 6th testcase at end of rfc2047
2303 s = '(=?ISO-8859-1?Q?a_b?=)'
2304 self.assertEqual(decode_header(s),
2305 [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)])
2306
2307 def test_rfc2047_rfc2047_7(self):
2308 # 7th testcase at end of rfc2047
2309 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)'
2310 self.assertEqual(decode_header(s),
2311 [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'),
2312 (b')', None)])
2313 self.assertEqual(make_header(decode_header(s)).encode(), s.lower())
2314 self.assertEqual(str(make_header(decode_header(s))), '(a b)')
2315
R David Murray82ffabd2012-06-03 12:27:07 -04002316 def test_multiline_header(self):
2317 s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>'
2318 self.assertEqual(decode_header(s),
2319 [(b'"M\xfcller T"', 'windows-1252'),
2320 (b'<T.Mueller@xxx.com>', None)])
2321 self.assertEqual(make_header(decode_header(s)).encode(),
2322 ''.join(s.splitlines()))
2323 self.assertEqual(str(make_header(decode_header(s))),
2324 '"Müller T" <T.Mueller@xxx.com>')
2325
Ezio Melottib3aedd42010-11-20 19:04:17 +00002326
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002327# Test the MIMEMessage class
2328class TestMIMEMessage(TestEmailBase):
2329 def setUp(self):
2330 with openfile('msg_11.txt') as fp:
2331 self._text = fp.read()
2332
2333 def test_type_error(self):
2334 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2335
2336 def test_valid_argument(self):
2337 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002338 subject = 'A sub-message'
2339 m = Message()
2340 m['Subject'] = subject
2341 r = MIMEMessage(m)
2342 eq(r.get_content_type(), 'message/rfc822')
2343 payload = r.get_payload()
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002344 self.assertIsInstance(payload, list)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002345 eq(len(payload), 1)
2346 subpart = payload[0]
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002347 self.assertIs(subpart, m)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002348 eq(subpart['subject'], subject)
2349
2350 def test_bad_multipart(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002351 msg1 = Message()
2352 msg1['Subject'] = 'subpart 1'
2353 msg2 = Message()
2354 msg2['Subject'] = 'subpart 2'
2355 r = MIMEMessage(msg1)
2356 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2357
2358 def test_generate(self):
2359 # First craft the message to be encapsulated
2360 m = Message()
2361 m['Subject'] = 'An enclosed message'
2362 m.set_payload('Here is the body of the message.\n')
2363 r = MIMEMessage(m)
2364 r['Subject'] = 'The enclosing message'
2365 s = StringIO()
2366 g = Generator(s)
2367 g.flatten(r)
2368 self.assertEqual(s.getvalue(), """\
2369Content-Type: message/rfc822
2370MIME-Version: 1.0
2371Subject: The enclosing message
2372
2373Subject: An enclosed message
2374
2375Here is the body of the message.
2376""")
2377
2378 def test_parse_message_rfc822(self):
2379 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002380 msg = self._msgobj('msg_11.txt')
2381 eq(msg.get_content_type(), 'message/rfc822')
2382 payload = msg.get_payload()
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002383 self.assertIsInstance(payload, list)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002384 eq(len(payload), 1)
2385 submsg = payload[0]
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002386 self.assertIsInstance(submsg, Message)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002387 eq(submsg['subject'], 'An enclosed message')
2388 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2389
2390 def test_dsn(self):
2391 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002392 # msg 16 is a Delivery Status Notification, see RFC 1894
2393 msg = self._msgobj('msg_16.txt')
2394 eq(msg.get_content_type(), 'multipart/report')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002395 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002396 eq(len(msg.get_payload()), 3)
2397 # Subpart 1 is a text/plain, human readable section
2398 subpart = msg.get_payload(0)
2399 eq(subpart.get_content_type(), 'text/plain')
2400 eq(subpart.get_payload(), """\
2401This report relates to a message you sent with the following header fields:
2402
2403 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2404 Date: Sun, 23 Sep 2001 20:10:55 -0700
2405 From: "Ian T. Henry" <henryi@oxy.edu>
2406 To: SoCal Raves <scr@socal-raves.org>
2407 Subject: [scr] yeah for Ians!!
2408
2409Your message cannot be delivered to the following recipients:
2410
2411 Recipient address: jangel1@cougar.noc.ucla.edu
2412 Reason: recipient reached disk quota
2413
2414""")
2415 # Subpart 2 contains the machine parsable DSN information. It
2416 # consists of two blocks of headers, represented by two nested Message
2417 # objects.
2418 subpart = msg.get_payload(1)
2419 eq(subpart.get_content_type(), 'message/delivery-status')
2420 eq(len(subpart.get_payload()), 2)
2421 # message/delivery-status should treat each block as a bunch of
2422 # headers, i.e. a bunch of Message objects.
2423 dsn1 = subpart.get_payload(0)
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002424 self.assertIsInstance(dsn1, Message)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002425 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2426 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2427 # Try a missing one <wink>
2428 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2429 dsn2 = subpart.get_payload(1)
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002430 self.assertIsInstance(dsn2, Message)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002431 eq(dsn2['action'], 'failed')
2432 eq(dsn2.get_params(header='original-recipient'),
2433 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2434 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2435 # Subpart 3 is the original message
2436 subpart = msg.get_payload(2)
2437 eq(subpart.get_content_type(), 'message/rfc822')
2438 payload = subpart.get_payload()
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002439 self.assertIsInstance(payload, list)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002440 eq(len(payload), 1)
2441 subsubpart = payload[0]
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002442 self.assertIsInstance(subsubpart, Message)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002443 eq(subsubpart.get_content_type(), 'text/plain')
2444 eq(subsubpart['message-id'],
2445 '<002001c144a6$8752e060$56104586@oxy.edu>')
2446
2447 def test_epilogue(self):
2448 eq = self.ndiffAssertEqual
2449 with openfile('msg_21.txt') as fp:
2450 text = fp.read()
2451 msg = Message()
2452 msg['From'] = 'aperson@dom.ain'
2453 msg['To'] = 'bperson@dom.ain'
2454 msg['Subject'] = 'Test'
2455 msg.preamble = 'MIME message'
2456 msg.epilogue = 'End of MIME message\n'
2457 msg1 = MIMEText('One')
2458 msg2 = MIMEText('Two')
2459 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2460 msg.attach(msg1)
2461 msg.attach(msg2)
2462 sfp = StringIO()
2463 g = Generator(sfp)
2464 g.flatten(msg)
2465 eq(sfp.getvalue(), text)
2466
2467 def test_no_nl_preamble(self):
2468 eq = self.ndiffAssertEqual
2469 msg = Message()
2470 msg['From'] = 'aperson@dom.ain'
2471 msg['To'] = 'bperson@dom.ain'
2472 msg['Subject'] = 'Test'
2473 msg.preamble = 'MIME message'
2474 msg.epilogue = ''
2475 msg1 = MIMEText('One')
2476 msg2 = MIMEText('Two')
2477 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2478 msg.attach(msg1)
2479 msg.attach(msg2)
2480 eq(msg.as_string(), """\
2481From: aperson@dom.ain
2482To: bperson@dom.ain
2483Subject: Test
2484Content-Type: multipart/mixed; boundary="BOUNDARY"
2485
2486MIME message
2487--BOUNDARY
2488Content-Type: text/plain; charset="us-ascii"
2489MIME-Version: 1.0
2490Content-Transfer-Encoding: 7bit
2491
2492One
2493--BOUNDARY
2494Content-Type: text/plain; charset="us-ascii"
2495MIME-Version: 1.0
2496Content-Transfer-Encoding: 7bit
2497
2498Two
2499--BOUNDARY--
2500""")
2501
2502 def test_default_type(self):
2503 eq = self.assertEqual
2504 with openfile('msg_30.txt') as fp:
2505 msg = email.message_from_file(fp)
2506 container1 = msg.get_payload(0)
2507 eq(container1.get_default_type(), 'message/rfc822')
2508 eq(container1.get_content_type(), 'message/rfc822')
2509 container2 = msg.get_payload(1)
2510 eq(container2.get_default_type(), 'message/rfc822')
2511 eq(container2.get_content_type(), 'message/rfc822')
2512 container1a = container1.get_payload(0)
2513 eq(container1a.get_default_type(), 'text/plain')
2514 eq(container1a.get_content_type(), 'text/plain')
2515 container2a = container2.get_payload(0)
2516 eq(container2a.get_default_type(), 'text/plain')
2517 eq(container2a.get_content_type(), 'text/plain')
2518
2519 def test_default_type_with_explicit_container_type(self):
2520 eq = self.assertEqual
2521 with openfile('msg_28.txt') as fp:
2522 msg = email.message_from_file(fp)
2523 container1 = msg.get_payload(0)
2524 eq(container1.get_default_type(), 'message/rfc822')
2525 eq(container1.get_content_type(), 'message/rfc822')
2526 container2 = msg.get_payload(1)
2527 eq(container2.get_default_type(), 'message/rfc822')
2528 eq(container2.get_content_type(), 'message/rfc822')
2529 container1a = container1.get_payload(0)
2530 eq(container1a.get_default_type(), 'text/plain')
2531 eq(container1a.get_content_type(), 'text/plain')
2532 container2a = container2.get_payload(0)
2533 eq(container2a.get_default_type(), 'text/plain')
2534 eq(container2a.get_content_type(), 'text/plain')
2535
2536 def test_default_type_non_parsed(self):
2537 eq = self.assertEqual
2538 neq = self.ndiffAssertEqual
2539 # Set up container
2540 container = MIMEMultipart('digest', 'BOUNDARY')
2541 container.epilogue = ''
2542 # Set up subparts
2543 subpart1a = MIMEText('message 1\n')
2544 subpart2a = MIMEText('message 2\n')
2545 subpart1 = MIMEMessage(subpart1a)
2546 subpart2 = MIMEMessage(subpart2a)
2547 container.attach(subpart1)
2548 container.attach(subpart2)
2549 eq(subpart1.get_content_type(), 'message/rfc822')
2550 eq(subpart1.get_default_type(), 'message/rfc822')
2551 eq(subpart2.get_content_type(), 'message/rfc822')
2552 eq(subpart2.get_default_type(), 'message/rfc822')
2553 neq(container.as_string(0), '''\
2554Content-Type: multipart/digest; boundary="BOUNDARY"
2555MIME-Version: 1.0
2556
2557--BOUNDARY
2558Content-Type: message/rfc822
2559MIME-Version: 1.0
2560
2561Content-Type: text/plain; charset="us-ascii"
2562MIME-Version: 1.0
2563Content-Transfer-Encoding: 7bit
2564
2565message 1
2566
2567--BOUNDARY
2568Content-Type: message/rfc822
2569MIME-Version: 1.0
2570
2571Content-Type: text/plain; charset="us-ascii"
2572MIME-Version: 1.0
2573Content-Transfer-Encoding: 7bit
2574
2575message 2
2576
2577--BOUNDARY--
2578''')
2579 del subpart1['content-type']
2580 del subpart1['mime-version']
2581 del subpart2['content-type']
2582 del subpart2['mime-version']
2583 eq(subpart1.get_content_type(), 'message/rfc822')
2584 eq(subpart1.get_default_type(), 'message/rfc822')
2585 eq(subpart2.get_content_type(), 'message/rfc822')
2586 eq(subpart2.get_default_type(), 'message/rfc822')
2587 neq(container.as_string(0), '''\
2588Content-Type: multipart/digest; boundary="BOUNDARY"
2589MIME-Version: 1.0
2590
2591--BOUNDARY
2592
2593Content-Type: text/plain; charset="us-ascii"
2594MIME-Version: 1.0
2595Content-Transfer-Encoding: 7bit
2596
2597message 1
2598
2599--BOUNDARY
2600
2601Content-Type: text/plain; charset="us-ascii"
2602MIME-Version: 1.0
2603Content-Transfer-Encoding: 7bit
2604
2605message 2
2606
2607--BOUNDARY--
2608''')
2609
2610 def test_mime_attachments_in_constructor(self):
2611 eq = self.assertEqual
2612 text1 = MIMEText('')
2613 text2 = MIMEText('')
2614 msg = MIMEMultipart(_subparts=(text1, text2))
2615 eq(len(msg.get_payload()), 2)
2616 eq(msg.get_payload(0), text1)
2617 eq(msg.get_payload(1), text2)
2618
Christian Heimes587c2bf2008-01-19 16:21:02 +00002619 def test_default_multipart_constructor(self):
2620 msg = MIMEMultipart()
2621 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002622
Ezio Melottib3aedd42010-11-20 19:04:17 +00002623
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002624# A general test of parser->model->generator idempotency. IOW, read a message
2625# in, parse it into a message object tree, then without touching the tree,
2626# regenerate the plain text. The original text and the transformed text
2627# should be identical. Note: that we ignore the Unix-From since that may
2628# contain a changed date.
2629class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002630
2631 linesep = '\n'
2632
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002633 def _msgobj(self, filename):
2634 with openfile(filename) as fp:
2635 data = fp.read()
2636 msg = email.message_from_string(data)
2637 return msg, data
2638
R. David Murray719a4492010-11-21 16:53:48 +00002639 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002640 eq = self.ndiffAssertEqual
2641 s = StringIO()
2642 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002643 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002644 eq(text, s.getvalue())
2645
2646 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002647 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002648 msg, text = self._msgobj('msg_01.txt')
2649 eq(msg.get_content_type(), 'text/plain')
2650 eq(msg.get_content_maintype(), 'text')
2651 eq(msg.get_content_subtype(), 'plain')
2652 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2653 eq(msg.get_param('charset'), 'us-ascii')
2654 eq(msg.preamble, None)
2655 eq(msg.epilogue, None)
2656 self._idempotent(msg, text)
2657
2658 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002659 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002660 msg, text = self._msgobj('msg_03.txt')
2661 eq(msg.get_content_type(), 'text/plain')
2662 eq(msg.get_params(), None)
2663 eq(msg.get_param('charset'), None)
2664 self._idempotent(msg, text)
2665
2666 def test_simple_multipart(self):
2667 msg, text = self._msgobj('msg_04.txt')
2668 self._idempotent(msg, text)
2669
2670 def test_MIME_digest(self):
2671 msg, text = self._msgobj('msg_02.txt')
2672 self._idempotent(msg, text)
2673
2674 def test_long_header(self):
2675 msg, text = self._msgobj('msg_27.txt')
2676 self._idempotent(msg, text)
2677
2678 def test_MIME_digest_with_part_headers(self):
2679 msg, text = self._msgobj('msg_28.txt')
2680 self._idempotent(msg, text)
2681
2682 def test_mixed_with_image(self):
2683 msg, text = self._msgobj('msg_06.txt')
2684 self._idempotent(msg, text)
2685
2686 def test_multipart_report(self):
2687 msg, text = self._msgobj('msg_05.txt')
2688 self._idempotent(msg, text)
2689
2690 def test_dsn(self):
2691 msg, text = self._msgobj('msg_16.txt')
2692 self._idempotent(msg, text)
2693
2694 def test_preamble_epilogue(self):
2695 msg, text = self._msgobj('msg_21.txt')
2696 self._idempotent(msg, text)
2697
2698 def test_multipart_one_part(self):
2699 msg, text = self._msgobj('msg_23.txt')
2700 self._idempotent(msg, text)
2701
2702 def test_multipart_no_parts(self):
2703 msg, text = self._msgobj('msg_24.txt')
2704 self._idempotent(msg, text)
2705
2706 def test_no_start_boundary(self):
2707 msg, text = self._msgobj('msg_31.txt')
2708 self._idempotent(msg, text)
2709
2710 def test_rfc2231_charset(self):
2711 msg, text = self._msgobj('msg_32.txt')
2712 self._idempotent(msg, text)
2713
2714 def test_more_rfc2231_parameters(self):
2715 msg, text = self._msgobj('msg_33.txt')
2716 self._idempotent(msg, text)
2717
2718 def test_text_plain_in_a_multipart_digest(self):
2719 msg, text = self._msgobj('msg_34.txt')
2720 self._idempotent(msg, text)
2721
2722 def test_nested_multipart_mixeds(self):
2723 msg, text = self._msgobj('msg_12a.txt')
2724 self._idempotent(msg, text)
2725
2726 def test_message_external_body_idempotent(self):
2727 msg, text = self._msgobj('msg_36.txt')
2728 self._idempotent(msg, text)
2729
R. David Murray719a4492010-11-21 16:53:48 +00002730 def test_message_delivery_status(self):
2731 msg, text = self._msgobj('msg_43.txt')
2732 self._idempotent(msg, text, unixfrom=True)
2733
R. David Murray96fd54e2010-10-08 15:55:28 +00002734 def test_message_signed_idempotent(self):
2735 msg, text = self._msgobj('msg_45.txt')
2736 self._idempotent(msg, text)
2737
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002738 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002739 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002740 # Get a message object and reset the seek pointer for other tests
2741 msg, text = self._msgobj('msg_05.txt')
2742 eq(msg.get_content_type(), 'multipart/report')
2743 # Test the Content-Type: parameters
2744 params = {}
2745 for pk, pv in msg.get_params():
2746 params[pk] = pv
2747 eq(params['report-type'], 'delivery-status')
2748 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002749 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2750 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002751 eq(len(msg.get_payload()), 3)
2752 # Make sure the subparts are what we expect
2753 msg1 = msg.get_payload(0)
2754 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002755 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002756 msg2 = msg.get_payload(1)
2757 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002758 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002759 msg3 = msg.get_payload(2)
2760 eq(msg3.get_content_type(), 'message/rfc822')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002761 self.assertIsInstance(msg3, Message)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002762 payload = msg3.get_payload()
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002763 self.assertIsInstance(payload, list)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002764 eq(len(payload), 1)
2765 msg4 = payload[0]
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002766 self.assertIsInstance(msg4, Message)
R. David Murray719a4492010-11-21 16:53:48 +00002767 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002768
2769 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002770 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002771 msg, text = self._msgobj('msg_06.txt')
2772 # Check some of the outer headers
2773 eq(msg.get_content_type(), 'message/rfc822')
2774 # Make sure the payload is a list of exactly one sub-Message, and that
2775 # that submessage has a type of text/plain
2776 payload = msg.get_payload()
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002777 self.assertIsInstance(payload, list)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002778 eq(len(payload), 1)
2779 msg1 = payload[0]
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002780 self.assertIsInstance(msg1, Message)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002781 eq(msg1.get_content_type(), 'text/plain')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002782 self.assertIsInstance(msg1.get_payload(), str)
R. David Murray719a4492010-11-21 16:53:48 +00002783 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002784
2785
Ezio Melottib3aedd42010-11-20 19:04:17 +00002786
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002787# Test various other bits of the package's functionality
2788class TestMiscellaneous(TestEmailBase):
2789 def test_message_from_string(self):
2790 with openfile('msg_01.txt') as fp:
2791 text = fp.read()
2792 msg = email.message_from_string(text)
2793 s = StringIO()
2794 # Don't wrap/continue long headers since we're trying to test
2795 # idempotency.
2796 g = Generator(s, maxheaderlen=0)
2797 g.flatten(msg)
2798 self.assertEqual(text, s.getvalue())
2799
2800 def test_message_from_file(self):
2801 with openfile('msg_01.txt') as fp:
2802 text = fp.read()
2803 fp.seek(0)
2804 msg = email.message_from_file(fp)
2805 s = StringIO()
2806 # Don't wrap/continue long headers since we're trying to test
2807 # idempotency.
2808 g = Generator(s, maxheaderlen=0)
2809 g.flatten(msg)
2810 self.assertEqual(text, s.getvalue())
2811
2812 def test_message_from_string_with_class(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002813 with openfile('msg_01.txt') as fp:
2814 text = fp.read()
2815
2816 # Create a subclass
2817 class MyMessage(Message):
2818 pass
2819
2820 msg = email.message_from_string(text, MyMessage)
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002821 self.assertIsInstance(msg, MyMessage)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002822 # Try something more complicated
2823 with openfile('msg_02.txt') as fp:
2824 text = fp.read()
2825 msg = email.message_from_string(text, MyMessage)
2826 for subpart in msg.walk():
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002827 self.assertIsInstance(subpart, MyMessage)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002828
2829 def test_message_from_file_with_class(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002830 # Create a subclass
2831 class MyMessage(Message):
2832 pass
2833
2834 with openfile('msg_01.txt') as fp:
2835 msg = email.message_from_file(fp, MyMessage)
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002836 self.assertIsInstance(msg, MyMessage)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002837 # Try something more complicated
2838 with openfile('msg_02.txt') as fp:
2839 msg = email.message_from_file(fp, MyMessage)
2840 for subpart in msg.walk():
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002841 self.assertIsInstance(subpart, MyMessage)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002842
R David Murrayc27e5222012-05-25 15:01:48 -04002843 def test_custom_message_does_not_require_arguments(self):
2844 class MyMessage(Message):
2845 def __init__(self):
2846 super().__init__()
2847 msg = self._str_msg("Subject: test\n\ntest", MyMessage)
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002848 self.assertIsInstance(msg, MyMessage)
R David Murrayc27e5222012-05-25 15:01:48 -04002849
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002850 def test__all__(self):
2851 module = __import__('email')
R David Murray1b6c7242012-03-16 22:43:05 -04002852 self.assertEqual(sorted(module.__all__), [
2853 'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2854 'generator', 'header', 'iterators', 'message',
2855 'message_from_binary_file', 'message_from_bytes',
2856 'message_from_file', 'message_from_string', 'mime', 'parser',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002857 'quoprimime', 'utils',
2858 ])
2859
2860 def test_formatdate(self):
2861 now = time.time()
2862 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2863 time.gmtime(now)[:6])
2864
2865 def test_formatdate_localtime(self):
2866 now = time.time()
2867 self.assertEqual(
2868 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2869 time.localtime(now)[:6])
2870
2871 def test_formatdate_usegmt(self):
2872 now = time.time()
2873 self.assertEqual(
2874 utils.formatdate(now, localtime=False),
2875 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2876 self.assertEqual(
2877 utils.formatdate(now, localtime=False, usegmt=True),
2878 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2879
Georg Brandl1aca31e2012-09-22 09:03:56 +02002880 # parsedate and parsedate_tz will become deprecated interfaces someday
2881 def test_parsedate_returns_None_for_invalid_strings(self):
2882 self.assertIsNone(utils.parsedate(''))
2883 self.assertIsNone(utils.parsedate_tz(''))
2884 self.assertIsNone(utils.parsedate('0'))
2885 self.assertIsNone(utils.parsedate_tz('0'))
2886 self.assertIsNone(utils.parsedate('A Complete Waste of Time'))
2887 self.assertIsNone(utils.parsedate_tz('A Complete Waste of Time'))
2888 # Not a part of the spec but, but this has historically worked:
2889 self.assertIsNone(utils.parsedate(None))
2890 self.assertIsNone(utils.parsedate_tz(None))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002891
2892 def test_parsedate_compact(self):
2893 # The FWS after the comma is optional
2894 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2895 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2896
2897 def test_parsedate_no_dayofweek(self):
2898 eq = self.assertEqual
2899 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2900 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2901
2902 def test_parsedate_compact_no_dayofweek(self):
2903 eq = self.assertEqual
2904 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2905 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2906
R. David Murray4a62e892010-12-23 20:35:46 +00002907 def test_parsedate_no_space_before_positive_offset(self):
2908 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2909 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2910
2911 def test_parsedate_no_space_before_negative_offset(self):
2912 # Issue 1155362: we already handled '+' for this case.
2913 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2914 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2915
2916
R David Murrayaccd1c02011-03-13 20:06:23 -04002917 def test_parsedate_accepts_time_with_dots(self):
2918 eq = self.assertEqual
2919 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2920 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2921 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2922 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2923
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002924 def test_parsedate_acceptable_to_time_functions(self):
2925 eq = self.assertEqual
2926 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2927 t = int(time.mktime(timetup))
2928 eq(time.localtime(t)[:6], timetup[:6])
2929 eq(int(time.strftime('%Y', timetup)), 2003)
2930 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2931 t = int(time.mktime(timetup[:9]))
2932 eq(time.localtime(t)[:6], timetup[:6])
2933 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2934
Alexander Belopolskya07548e2012-06-21 20:34:09 -04002935 def test_mktime_tz(self):
2936 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2937 -1, -1, -1, 0)), 0)
2938 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2939 -1, -1, -1, 1234)), -1234)
2940
R. David Murray219d1c82010-08-25 00:45:55 +00002941 def test_parsedate_y2k(self):
2942 """Test for parsing a date with a two-digit year.
2943
2944 Parsing a date with a two-digit year should return the correct
2945 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2946 obsoletes RFC822) requires four-digit years.
2947
2948 """
2949 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2950 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2951 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2952 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2953
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002954 def test_parseaddr_empty(self):
2955 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2956 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2957
2958 def test_noquote_dump(self):
2959 self.assertEqual(
2960 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2961 'A Silly Person <person@dom.ain>')
2962
2963 def test_escape_dump(self):
2964 self.assertEqual(
2965 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
R David Murrayb53319f2012-03-14 15:31:47 -04002966 r'"A (Very) Silly Person" <person@dom.ain>')
2967 self.assertEqual(
2968 utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
2969 ('A (Very) Silly Person', 'person@dom.ain'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002970 a = r'A \(Special\) Person'
2971 b = 'person@dom.ain'
2972 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2973
2974 def test_escape_backslashes(self):
2975 self.assertEqual(
2976 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2977 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2978 a = r'Arthur \Backslash\ Foobar'
2979 b = 'person@dom.ain'
2980 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2981
R David Murray8debacb2011-04-06 09:35:57 -04002982 def test_quotes_unicode_names(self):
2983 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2984 name = "H\u00e4ns W\u00fcrst"
2985 addr = 'person@dom.ain'
2986 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2987 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
2988 self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
2989 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
2990 latin1_quopri)
2991
2992 def test_accepts_any_charset_like_object(self):
2993 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2994 name = "H\u00e4ns W\u00fcrst"
2995 addr = 'person@dom.ain'
2996 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2997 foobar = "FOOBAR"
2998 class CharsetMock:
2999 def header_encode(self, string):
3000 return foobar
3001 mock = CharsetMock()
3002 mock_expected = "%s <%s>" % (foobar, addr)
3003 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
3004 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
3005 utf8_base64)
3006
3007 def test_invalid_charset_like_object_raises_error(self):
3008 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
3009 name = "H\u00e4ns W\u00fcrst"
3010 addr = 'person@dom.ain'
3011 # A object without a header_encode method:
3012 bad_charset = object()
3013 self.assertRaises(AttributeError, utils.formataddr, (name, addr),
3014 bad_charset)
3015
3016 def test_unicode_address_raises_error(self):
3017 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
3018 addr = 'pers\u00f6n@dom.in'
3019 self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
3020 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
3021
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003022 def test_name_with_dot(self):
3023 x = 'John X. Doe <jxd@example.com>'
3024 y = '"John X. Doe" <jxd@example.com>'
3025 a, b = ('John X. Doe', 'jxd@example.com')
3026 self.assertEqual(utils.parseaddr(x), (a, b))
3027 self.assertEqual(utils.parseaddr(y), (a, b))
3028 # formataddr() quotes the name if there's a dot in it
3029 self.assertEqual(utils.formataddr((a, b)), y)
3030
R. David Murray5397e862010-10-02 15:58:26 +00003031 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
3032 # issue 10005. Note that in the third test the second pair of
3033 # backslashes is not actually a quoted pair because it is not inside a
3034 # comment or quoted string: the address being parsed has a quoted
3035 # string containing a quoted backslash, followed by 'example' and two
3036 # backslashes, followed by another quoted string containing a space and
3037 # the word 'example'. parseaddr copies those two backslashes
3038 # literally. Per rfc5322 this is not technically correct since a \ may
3039 # not appear in an address outside of a quoted string. It is probably
3040 # a sensible Postel interpretation, though.
3041 eq = self.assertEqual
3042 eq(utils.parseaddr('""example" example"@example.com'),
3043 ('', '""example" example"@example.com'))
3044 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
3045 ('', '"\\"example\\" example"@example.com'))
3046 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
3047 ('', '"\\\\"example\\\\" example"@example.com'))
3048
R. David Murray63563cd2010-12-18 18:25:38 +00003049 def test_parseaddr_preserves_spaces_in_local_part(self):
3050 # issue 9286. A normal RFC5322 local part should not contain any
3051 # folding white space, but legacy local parts can (they are a sequence
3052 # of atoms, not dotatoms). On the other hand we strip whitespace from
3053 # before the @ and around dots, on the assumption that the whitespace
3054 # around the punctuation is a mistake in what would otherwise be
3055 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
3056 self.assertEqual(('', "merwok wok@xample.com"),
3057 utils.parseaddr("merwok wok@xample.com"))
3058 self.assertEqual(('', "merwok wok@xample.com"),
3059 utils.parseaddr("merwok wok@xample.com"))
3060 self.assertEqual(('', "merwok wok@xample.com"),
3061 utils.parseaddr(" merwok wok @xample.com"))
3062 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
3063 utils.parseaddr('merwok"wok" wok@xample.com'))
3064 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
3065 utils.parseaddr('merwok. wok . wok@xample.com'))
3066
R David Murrayb53319f2012-03-14 15:31:47 -04003067 def test_formataddr_does_not_quote_parens_in_quoted_string(self):
3068 addr = ("'foo@example.com' (foo@example.com)",
3069 'foo@example.com')
3070 addrstr = ('"\'foo@example.com\' '
3071 '(foo@example.com)" <foo@example.com>')
3072 self.assertEqual(utils.parseaddr(addrstr), addr)
3073 self.assertEqual(utils.formataddr(addr), addrstr)
3074
3075
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003076 def test_multiline_from_comment(self):
3077 x = """\
3078Foo
3079\tBar <foo@example.com>"""
3080 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
3081
3082 def test_quote_dump(self):
3083 self.assertEqual(
3084 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
3085 r'"A Silly; Person" <person@dom.ain>')
3086
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003087 def test_charset_richcomparisons(self):
3088 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003089 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003090 cset1 = Charset()
3091 cset2 = Charset()
3092 eq(cset1, 'us-ascii')
3093 eq(cset1, 'US-ASCII')
3094 eq(cset1, 'Us-AsCiI')
3095 eq('us-ascii', cset1)
3096 eq('US-ASCII', cset1)
3097 eq('Us-AsCiI', cset1)
3098 ne(cset1, 'usascii')
3099 ne(cset1, 'USASCII')
3100 ne(cset1, 'UsAsCiI')
3101 ne('usascii', cset1)
3102 ne('USASCII', cset1)
3103 ne('UsAsCiI', cset1)
3104 eq(cset1, cset2)
3105 eq(cset2, cset1)
3106
3107 def test_getaddresses(self):
3108 eq = self.assertEqual
3109 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
3110 'Bud Person <bperson@dom.ain>']),
3111 [('Al Person', 'aperson@dom.ain'),
3112 ('Bud Person', 'bperson@dom.ain')])
3113
3114 def test_getaddresses_nasty(self):
3115 eq = self.assertEqual
3116 eq(utils.getaddresses(['foo: ;']), [('', '')])
3117 eq(utils.getaddresses(
3118 ['[]*-- =~$']),
3119 [('', ''), ('', ''), ('', '*--')])
3120 eq(utils.getaddresses(
3121 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
3122 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
3123
3124 def test_getaddresses_embedded_comment(self):
3125 """Test proper handling of a nested comment"""
3126 eq = self.assertEqual
3127 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
3128 eq(addrs[0][1], 'foo@bar.com')
3129
3130 def test_utils_quote_unquote(self):
3131 eq = self.assertEqual
3132 msg = Message()
3133 msg.add_header('content-disposition', 'attachment',
3134 filename='foo\\wacky"name')
3135 eq(msg.get_filename(), 'foo\\wacky"name')
3136
3137 def test_get_body_encoding_with_bogus_charset(self):
3138 charset = Charset('not a charset')
3139 self.assertEqual(charset.get_body_encoding(), 'base64')
3140
3141 def test_get_body_encoding_with_uppercase_charset(self):
3142 eq = self.assertEqual
3143 msg = Message()
3144 msg['Content-Type'] = 'text/plain; charset=UTF-8'
3145 eq(msg['content-type'], 'text/plain; charset=UTF-8')
3146 charsets = msg.get_charsets()
3147 eq(len(charsets), 1)
3148 eq(charsets[0], 'utf-8')
3149 charset = Charset(charsets[0])
3150 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003151 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003152 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
3153 eq(msg.get_payload(decode=True), b'hello world')
3154 eq(msg['content-transfer-encoding'], 'base64')
3155 # Try another one
3156 msg = Message()
3157 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
3158 charsets = msg.get_charsets()
3159 eq(len(charsets), 1)
3160 eq(charsets[0], 'us-ascii')
3161 charset = Charset(charsets[0])
3162 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
3163 msg.set_payload('hello world', charset=charset)
3164 eq(msg.get_payload(), 'hello world')
3165 eq(msg['content-transfer-encoding'], '7bit')
3166
3167 def test_charsets_case_insensitive(self):
3168 lc = Charset('us-ascii')
3169 uc = Charset('US-ASCII')
3170 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
3171
3172 def test_partial_falls_inside_message_delivery_status(self):
3173 eq = self.ndiffAssertEqual
3174 # The Parser interface provides chunks of data to FeedParser in 8192
3175 # byte gulps. SF bug #1076485 found one of those chunks inside
3176 # message/delivery-status header block, which triggered an
3177 # unreadline() of NeedMoreData.
3178 msg = self._msgobj('msg_43.txt')
3179 sfp = StringIO()
3180 iterators._structure(msg, sfp)
3181 eq(sfp.getvalue(), """\
3182multipart/report
3183 text/plain
3184 message/delivery-status
3185 text/plain
3186 text/plain
3187 text/plain
3188 text/plain
3189 text/plain
3190 text/plain
3191 text/plain
3192 text/plain
3193 text/plain
3194 text/plain
3195 text/plain
3196 text/plain
3197 text/plain
3198 text/plain
3199 text/plain
3200 text/plain
3201 text/plain
3202 text/plain
3203 text/plain
3204 text/plain
3205 text/plain
3206 text/plain
3207 text/plain
3208 text/plain
3209 text/plain
3210 text/plain
3211 text/rfc822-headers
3212""")
3213
R. David Murraya0b44b52010-12-02 21:47:19 +00003214 def test_make_msgid_domain(self):
3215 self.assertEqual(
3216 email.utils.make_msgid(domain='testdomain-string')[-19:],
3217 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003218
R David Murraye67c6c52013-03-07 16:38:03 -05003219 def test_Generator_linend(self):
3220 # Issue 14645.
3221 with openfile('msg_26.txt', newline='\n') as f:
3222 msgtxt = f.read()
3223 msgtxt_nl = msgtxt.replace('\r\n', '\n')
3224 msg = email.message_from_string(msgtxt)
3225 s = StringIO()
3226 g = email.generator.Generator(s)
3227 g.flatten(msg)
3228 self.assertEqual(s.getvalue(), msgtxt_nl)
3229
3230 def test_BytesGenerator_linend(self):
3231 # Issue 14645.
3232 with openfile('msg_26.txt', newline='\n') as f:
3233 msgtxt = f.read()
3234 msgtxt_nl = msgtxt.replace('\r\n', '\n')
3235 msg = email.message_from_string(msgtxt_nl)
3236 s = BytesIO()
3237 g = email.generator.BytesGenerator(s)
3238 g.flatten(msg, linesep='\r\n')
3239 self.assertEqual(s.getvalue().decode('ascii'), msgtxt)
3240
3241 def test_BytesGenerator_linend_with_non_ascii(self):
3242 # Issue 14645.
3243 with openfile('msg_26.txt', 'rb') as f:
3244 msgtxt = f.read()
3245 msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6')
3246 msgtxt_nl = msgtxt.replace(b'\r\n', b'\n')
3247 msg = email.message_from_bytes(msgtxt_nl)
3248 s = BytesIO()
3249 g = email.generator.BytesGenerator(s)
3250 g.flatten(msg, linesep='\r\n')
3251 self.assertEqual(s.getvalue(), msgtxt)
3252
Ezio Melottib3aedd42010-11-20 19:04:17 +00003253
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003254# Test the iterator/generators
3255class TestIterators(TestEmailBase):
3256 def test_body_line_iterator(self):
3257 eq = self.assertEqual
3258 neq = self.ndiffAssertEqual
3259 # First a simple non-multipart message
3260 msg = self._msgobj('msg_01.txt')
3261 it = iterators.body_line_iterator(msg)
3262 lines = list(it)
3263 eq(len(lines), 6)
3264 neq(EMPTYSTRING.join(lines), msg.get_payload())
3265 # Now a more complicated multipart
3266 msg = self._msgobj('msg_02.txt')
3267 it = iterators.body_line_iterator(msg)
3268 lines = list(it)
3269 eq(len(lines), 43)
3270 with openfile('msg_19.txt') as fp:
3271 neq(EMPTYSTRING.join(lines), fp.read())
3272
3273 def test_typed_subpart_iterator(self):
3274 eq = self.assertEqual
3275 msg = self._msgobj('msg_04.txt')
3276 it = iterators.typed_subpart_iterator(msg, 'text')
3277 lines = []
3278 subparts = 0
3279 for subpart in it:
3280 subparts += 1
3281 lines.append(subpart.get_payload())
3282 eq(subparts, 2)
3283 eq(EMPTYSTRING.join(lines), """\
3284a simple kind of mirror
3285to reflect upon our own
3286a simple kind of mirror
3287to reflect upon our own
3288""")
3289
3290 def test_typed_subpart_iterator_default_type(self):
3291 eq = self.assertEqual
3292 msg = self._msgobj('msg_03.txt')
3293 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
3294 lines = []
3295 subparts = 0
3296 for subpart in it:
3297 subparts += 1
3298 lines.append(subpart.get_payload())
3299 eq(subparts, 1)
3300 eq(EMPTYSTRING.join(lines), """\
3301
3302Hi,
3303
3304Do you like this message?
3305
3306-Me
3307""")
3308
R. David Murray45bf773f2010-07-17 01:19:57 +00003309 def test_pushCR_LF(self):
3310 '''FeedParser BufferedSubFile.push() assumed it received complete
3311 line endings. A CR ending one push() followed by a LF starting
3312 the next push() added an empty line.
3313 '''
3314 imt = [
3315 ("a\r \n", 2),
3316 ("b", 0),
3317 ("c\n", 1),
3318 ("", 0),
3319 ("d\r\n", 1),
3320 ("e\r", 0),
3321 ("\nf", 1),
3322 ("\r\n", 1),
3323 ]
3324 from email.feedparser import BufferedSubFile, NeedMoreData
3325 bsf = BufferedSubFile()
3326 om = []
3327 nt = 0
3328 for il, n in imt:
3329 bsf.push(il)
3330 nt += n
3331 n1 = 0
3332 while True:
3333 ol = bsf.readline()
3334 if ol == NeedMoreData:
3335 break
3336 om.append(ol)
3337 n1 += 1
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02003338 self.assertEqual(n, n1)
3339 self.assertEqual(len(om), nt)
3340 self.assertEqual(''.join([il for il, n in imt]), ''.join(om))
R. David Murray45bf773f2010-07-17 01:19:57 +00003341
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003342
Ezio Melottib3aedd42010-11-20 19:04:17 +00003343
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003344class TestParsers(TestEmailBase):
R David Murrayb35c8502011-04-13 16:46:05 -04003345
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003346 def test_header_parser(self):
3347 eq = self.assertEqual
3348 # Parse only the headers of a complex multipart MIME document
3349 with openfile('msg_02.txt') as fp:
3350 msg = HeaderParser().parse(fp)
3351 eq(msg['from'], 'ppp-request@zzz.org')
3352 eq(msg['to'], 'ppp@zzz.org')
3353 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003354 self.assertFalse(msg.is_multipart())
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02003355 self.assertIsInstance(msg.get_payload(), str)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003356
R David Murrayb35c8502011-04-13 16:46:05 -04003357 def test_bytes_header_parser(self):
3358 eq = self.assertEqual
3359 # Parse only the headers of a complex multipart MIME document
3360 with openfile('msg_02.txt', 'rb') as fp:
3361 msg = email.parser.BytesHeaderParser().parse(fp)
3362 eq(msg['from'], 'ppp-request@zzz.org')
3363 eq(msg['to'], 'ppp@zzz.org')
3364 eq(msg.get_content_type(), 'multipart/mixed')
3365 self.assertFalse(msg.is_multipart())
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02003366 self.assertIsInstance(msg.get_payload(), str)
3367 self.assertIsInstance(msg.get_payload(decode=True), bytes)
R David Murrayb35c8502011-04-13 16:46:05 -04003368
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003369 def test_whitespace_continuation(self):
3370 eq = self.assertEqual
3371 # This message contains a line after the Subject: header that has only
3372 # whitespace, but it is not empty!
3373 msg = email.message_from_string("""\
3374From: aperson@dom.ain
3375To: bperson@dom.ain
3376Subject: the next line has a space on it
3377\x20
3378Date: Mon, 8 Apr 2002 15:09:19 -0400
3379Message-ID: spam
3380
3381Here's the message body
3382""")
3383 eq(msg['subject'], 'the next line has a space on it\n ')
3384 eq(msg['message-id'], 'spam')
3385 eq(msg.get_payload(), "Here's the message body\n")
3386
3387 def test_whitespace_continuation_last_header(self):
3388 eq = self.assertEqual
3389 # Like the previous test, but the subject line is the last
3390 # header.
3391 msg = email.message_from_string("""\
3392From: aperson@dom.ain
3393To: bperson@dom.ain
3394Date: Mon, 8 Apr 2002 15:09:19 -0400
3395Message-ID: spam
3396Subject: the next line has a space on it
3397\x20
3398
3399Here's the message body
3400""")
3401 eq(msg['subject'], 'the next line has a space on it\n ')
3402 eq(msg['message-id'], 'spam')
3403 eq(msg.get_payload(), "Here's the message body\n")
3404
3405 def test_crlf_separation(self):
3406 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003407 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003408 msg = Parser().parse(fp)
3409 eq(len(msg.get_payload()), 2)
3410 part1 = msg.get_payload(0)
3411 eq(part1.get_content_type(), 'text/plain')
3412 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3413 part2 = msg.get_payload(1)
3414 eq(part2.get_content_type(), 'application/riscos')
3415
R. David Murray8451c4b2010-10-23 22:19:56 +00003416 def test_crlf_flatten(self):
3417 # Using newline='\n' preserves the crlfs in this input file.
3418 with openfile('msg_26.txt', newline='\n') as fp:
3419 text = fp.read()
3420 msg = email.message_from_string(text)
3421 s = StringIO()
3422 g = Generator(s)
3423 g.flatten(msg, linesep='\r\n')
3424 self.assertEqual(s.getvalue(), text)
3425
3426 maxDiff = None
3427
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003428 def test_multipart_digest_with_extra_mime_headers(self):
3429 eq = self.assertEqual
3430 neq = self.ndiffAssertEqual
3431 with openfile('msg_28.txt') as fp:
3432 msg = email.message_from_file(fp)
3433 # Structure is:
3434 # multipart/digest
3435 # message/rfc822
3436 # text/plain
3437 # message/rfc822
3438 # text/plain
3439 eq(msg.is_multipart(), 1)
3440 eq(len(msg.get_payload()), 2)
3441 part1 = msg.get_payload(0)
3442 eq(part1.get_content_type(), 'message/rfc822')
3443 eq(part1.is_multipart(), 1)
3444 eq(len(part1.get_payload()), 1)
3445 part1a = part1.get_payload(0)
3446 eq(part1a.is_multipart(), 0)
3447 eq(part1a.get_content_type(), 'text/plain')
3448 neq(part1a.get_payload(), 'message 1\n')
3449 # next message/rfc822
3450 part2 = msg.get_payload(1)
3451 eq(part2.get_content_type(), 'message/rfc822')
3452 eq(part2.is_multipart(), 1)
3453 eq(len(part2.get_payload()), 1)
3454 part2a = part2.get_payload(0)
3455 eq(part2a.is_multipart(), 0)
3456 eq(part2a.get_content_type(), 'text/plain')
3457 neq(part2a.get_payload(), 'message 2\n')
3458
3459 def test_three_lines(self):
3460 # A bug report by Andrew McNamara
3461 lines = ['From: Andrew Person <aperson@dom.ain',
3462 'Subject: Test',
3463 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3464 msg = email.message_from_string(NL.join(lines))
3465 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3466
3467 def test_strip_line_feed_and_carriage_return_in_headers(self):
3468 eq = self.assertEqual
3469 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3470 value1 = 'text'
3471 value2 = 'more text'
3472 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3473 value1, value2)
3474 msg = email.message_from_string(m)
3475 eq(msg.get('Header'), value1)
3476 eq(msg.get('Next-Header'), value2)
3477
3478 def test_rfc2822_header_syntax(self):
3479 eq = self.assertEqual
3480 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3481 msg = email.message_from_string(m)
3482 eq(len(msg), 3)
3483 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3484 eq(msg.get_payload(), 'body')
3485
3486 def test_rfc2822_space_not_allowed_in_header(self):
3487 eq = self.assertEqual
3488 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3489 msg = email.message_from_string(m)
3490 eq(len(msg.keys()), 0)
3491
3492 def test_rfc2822_one_character_header(self):
3493 eq = self.assertEqual
3494 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3495 msg = email.message_from_string(m)
3496 headers = msg.keys()
3497 headers.sort()
3498 eq(headers, ['A', 'B', 'CC'])
3499 eq(msg.get_payload(), 'body')
3500
R. David Murray45e0e142010-06-16 02:19:40 +00003501 def test_CRLFLF_at_end_of_part(self):
3502 # issue 5610: feedparser should not eat two chars from body part ending
3503 # with "\r\n\n".
3504 m = (
3505 "From: foo@bar.com\n"
3506 "To: baz\n"
3507 "Mime-Version: 1.0\n"
3508 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3509 "\n"
3510 "--BOUNDARY\n"
3511 "Content-Type: text/plain\n"
3512 "\n"
3513 "body ending with CRLF newline\r\n"
3514 "\n"
3515 "--BOUNDARY--\n"
3516 )
3517 msg = email.message_from_string(m)
3518 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003519
Ezio Melottib3aedd42010-11-20 19:04:17 +00003520
R. David Murray96fd54e2010-10-08 15:55:28 +00003521class Test8BitBytesHandling(unittest.TestCase):
3522 # In Python3 all input is string, but that doesn't work if the actual input
3523 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3524 # decode byte streams using the surrogateescape error handler, and
3525 # reconvert to binary at appropriate places if we detect surrogates. This
3526 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3527 # but it does allow us to parse and preserve them, and to decode body
3528 # parts that use an 8bit CTE.
3529
3530 bodytest_msg = textwrap.dedent("""\
3531 From: foo@bar.com
3532 To: baz
3533 Mime-Version: 1.0
3534 Content-Type: text/plain; charset={charset}
3535 Content-Transfer-Encoding: {cte}
3536
3537 {bodyline}
3538 """)
3539
3540 def test_known_8bit_CTE(self):
3541 m = self.bodytest_msg.format(charset='utf-8',
3542 cte='8bit',
3543 bodyline='pöstal').encode('utf-8')
3544 msg = email.message_from_bytes(m)
3545 self.assertEqual(msg.get_payload(), "pöstal\n")
3546 self.assertEqual(msg.get_payload(decode=True),
3547 "pöstal\n".encode('utf-8'))
3548
3549 def test_unknown_8bit_CTE(self):
3550 m = self.bodytest_msg.format(charset='notavalidcharset',
3551 cte='8bit',
3552 bodyline='pöstal').encode('utf-8')
3553 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003554 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003555 self.assertEqual(msg.get_payload(decode=True),
3556 "pöstal\n".encode('utf-8'))
3557
3558 def test_8bit_in_quopri_body(self):
3559 # This is non-RFC compliant data...without 'decode' the library code
3560 # decodes the body using the charset from the headers, and because the
3561 # source byte really is utf-8 this works. This is likely to fail
3562 # against real dirty data (ie: produce mojibake), but the data is
3563 # invalid anyway so it is as good a guess as any. But this means that
3564 # this test just confirms the current behavior; that behavior is not
3565 # necessarily the best possible behavior. With 'decode' it is
3566 # returning the raw bytes, so that test should be of correct behavior,
3567 # or at least produce the same result that email4 did.
3568 m = self.bodytest_msg.format(charset='utf-8',
3569 cte='quoted-printable',
3570 bodyline='p=C3=B6stál').encode('utf-8')
3571 msg = email.message_from_bytes(m)
3572 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3573 self.assertEqual(msg.get_payload(decode=True),
3574 'pöstál\n'.encode('utf-8'))
3575
3576 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3577 # This is similar to the previous test, but proves that if the 8bit
3578 # byte is undecodeable in the specified charset, it gets replaced
3579 # by the unicode 'unknown' character. Again, this may or may not
3580 # be the ideal behavior. Note that if decode=False none of the
3581 # decoders will get involved, so this is the only test we need
3582 # for this behavior.
3583 m = self.bodytest_msg.format(charset='ascii',
3584 cte='quoted-printable',
3585 bodyline='p=C3=B6stál').encode('utf-8')
3586 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003587 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003588 self.assertEqual(msg.get_payload(decode=True),
3589 'pöstál\n'.encode('utf-8'))
3590
R David Murray80e0aee2012-05-27 21:23:34 -04003591 # test_defect_handling:test_invalid_chars_in_base64_payload
R. David Murray96fd54e2010-10-08 15:55:28 +00003592 def test_8bit_in_base64_body(self):
R David Murray80e0aee2012-05-27 21:23:34 -04003593 # If we get 8bit bytes in a base64 body, we can just ignore them
3594 # as being outside the base64 alphabet and decode anyway. But
3595 # we register a defect.
R. David Murray96fd54e2010-10-08 15:55:28 +00003596 m = self.bodytest_msg.format(charset='utf-8',
3597 cte='base64',
3598 bodyline='cMO2c3RhbAá=').encode('utf-8')
3599 msg = email.message_from_bytes(m)
3600 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -04003601 'pöstal'.encode('utf-8'))
3602 self.assertIsInstance(msg.defects[0],
3603 errors.InvalidBase64CharactersDefect)
R. David Murray96fd54e2010-10-08 15:55:28 +00003604
3605 def test_8bit_in_uuencode_body(self):
3606 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3607 # normal means, so the block is returned undecoded, but as bytes.
3608 m = self.bodytest_msg.format(charset='utf-8',
3609 cte='uuencode',
3610 bodyline='<,.V<W1A; á ').encode('utf-8')
3611 msg = email.message_from_bytes(m)
3612 self.assertEqual(msg.get_payload(decode=True),
3613 '<,.V<W1A; á \n'.encode('utf-8'))
3614
3615
R. David Murray92532142011-01-07 23:25:30 +00003616 headertest_headers = (
3617 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3618 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3619 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3620 '\tJean de Baddie',
3621 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3622 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3623 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3624 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3625 )
3626 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3627 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003628
3629 def test_get_8bit_header(self):
3630 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003631 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3632 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003633
3634 def test_print_8bit_headers(self):
3635 msg = email.message_from_bytes(self.headertest_msg)
3636 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003637 textwrap.dedent("""\
3638 From: {}
3639 To: {}
3640 Subject: {}
3641 From: {}
3642
3643 Yes, they are flying.
3644 """).format(*[expected[1] for (_, expected) in
3645 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003646
3647 def test_values_with_8bit_headers(self):
3648 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003649 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003650 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003651 'b\uFFFD\uFFFDz',
3652 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3653 'coll\uFFFD\uFFFDgue, le pouf '
3654 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003655 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003656 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003657
3658 def test_items_with_8bit_headers(self):
3659 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003660 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003661 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003662 ('To', 'b\uFFFD\uFFFDz'),
3663 ('Subject', 'Maintenant je vous '
3664 'pr\uFFFD\uFFFDsente '
3665 'mon coll\uFFFD\uFFFDgue, le pouf '
3666 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3667 '\tJean de Baddie'),
3668 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003669
3670 def test_get_all_with_8bit_headers(self):
3671 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003672 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003673 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003674 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003675
R David Murraya2150232011-03-16 21:11:23 -04003676 def test_get_content_type_with_8bit(self):
3677 msg = email.message_from_bytes(textwrap.dedent("""\
3678 Content-Type: text/pl\xA7in; charset=utf-8
3679 """).encode('latin-1'))
3680 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3681 self.assertEqual(msg.get_content_maintype(), "text")
3682 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3683
R David Murray97f43c02012-06-24 05:03:27 -04003684 # test_headerregistry.TestContentTypeHeader.non_ascii_in_params
R David Murraya2150232011-03-16 21:11:23 -04003685 def test_get_params_with_8bit(self):
3686 msg = email.message_from_bytes(
3687 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3688 self.assertEqual(msg.get_params(header='x-header'),
3689 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3690 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3691 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3692 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3693
R David Murray97f43c02012-06-24 05:03:27 -04003694 # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value
R David Murraya2150232011-03-16 21:11:23 -04003695 def test_get_rfc2231_params_with_8bit(self):
3696 msg = email.message_from_bytes(textwrap.dedent("""\
3697 Content-Type: text/plain; charset=us-ascii;
3698 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3699 ).encode('latin-1'))
3700 self.assertEqual(msg.get_param('title'),
3701 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3702
3703 def test_set_rfc2231_params_with_8bit(self):
3704 msg = email.message_from_bytes(textwrap.dedent("""\
3705 Content-Type: text/plain; charset=us-ascii;
3706 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3707 ).encode('latin-1'))
3708 msg.set_param('title', 'test')
3709 self.assertEqual(msg.get_param('title'), 'test')
3710
3711 def test_del_rfc2231_params_with_8bit(self):
3712 msg = email.message_from_bytes(textwrap.dedent("""\
3713 Content-Type: text/plain; charset=us-ascii;
3714 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3715 ).encode('latin-1'))
3716 msg.del_param('title')
3717 self.assertEqual(msg.get_param('title'), None)
3718 self.assertEqual(msg.get_content_maintype(), 'text')
3719
3720 def test_get_payload_with_8bit_cte_header(self):
3721 msg = email.message_from_bytes(textwrap.dedent("""\
3722 Content-Transfer-Encoding: b\xa7se64
3723 Content-Type: text/plain; charset=latin-1
3724
3725 payload
3726 """).encode('latin-1'))
3727 self.assertEqual(msg.get_payload(), 'payload\n')
3728 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3729
R. David Murray96fd54e2010-10-08 15:55:28 +00003730 non_latin_bin_msg = textwrap.dedent("""\
3731 From: foo@bar.com
3732 To: báz
3733 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3734 \tJean de Baddie
3735 Mime-Version: 1.0
3736 Content-Type: text/plain; charset="utf-8"
3737 Content-Transfer-Encoding: 8bit
3738
3739 Да, они летят.
3740 """).encode('utf-8')
3741
3742 def test_bytes_generator(self):
3743 msg = email.message_from_bytes(self.non_latin_bin_msg)
3744 out = BytesIO()
3745 email.generator.BytesGenerator(out).flatten(msg)
3746 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3747
R. David Murray7372a072011-01-26 21:21:32 +00003748 def test_bytes_generator_handles_None_body(self):
3749 #Issue 11019
3750 msg = email.message.Message()
3751 out = BytesIO()
3752 email.generator.BytesGenerator(out).flatten(msg)
3753 self.assertEqual(out.getvalue(), b"\n")
3754
R. David Murray92532142011-01-07 23:25:30 +00003755 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003756 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003757 To: =?unknown-8bit?q?b=C3=A1z?=
3758 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3759 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3760 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003761 Mime-Version: 1.0
3762 Content-Type: text/plain; charset="utf-8"
3763 Content-Transfer-Encoding: base64
3764
3765 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3766 """)
3767
3768 def test_generator_handles_8bit(self):
3769 msg = email.message_from_bytes(self.non_latin_bin_msg)
3770 out = StringIO()
3771 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003772 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003773
3774 def test_bytes_generator_with_unix_from(self):
3775 # The unixfrom contains a current date, so we can't check it
3776 # literally. Just make sure the first word is 'From' and the
3777 # rest of the message matches the input.
3778 msg = email.message_from_bytes(self.non_latin_bin_msg)
3779 out = BytesIO()
3780 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3781 lines = out.getvalue().split(b'\n')
3782 self.assertEqual(lines[0].split()[0], b'From')
3783 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3784
R. David Murray92532142011-01-07 23:25:30 +00003785 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3786 non_latin_bin_msg_as7bit[2:4] = [
3787 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3788 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3789 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3790
R. David Murray96fd54e2010-10-08 15:55:28 +00003791 def test_message_from_binary_file(self):
3792 fn = 'test.msg'
3793 self.addCleanup(unlink, fn)
3794 with open(fn, 'wb') as testfile:
3795 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003796 with open(fn, 'rb') as testfile:
3797 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003798 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3799
3800 latin_bin_msg = textwrap.dedent("""\
3801 From: foo@bar.com
3802 To: Dinsdale
3803 Subject: Nudge nudge, wink, wink
3804 Mime-Version: 1.0
3805 Content-Type: text/plain; charset="latin-1"
3806 Content-Transfer-Encoding: 8bit
3807
3808 oh là là, know what I mean, know what I mean?
3809 """).encode('latin-1')
3810
3811 latin_bin_msg_as7bit = textwrap.dedent("""\
3812 From: foo@bar.com
3813 To: Dinsdale
3814 Subject: Nudge nudge, wink, wink
3815 Mime-Version: 1.0
3816 Content-Type: text/plain; charset="iso-8859-1"
3817 Content-Transfer-Encoding: quoted-printable
3818
3819 oh l=E0 l=E0, know what I mean, know what I mean?
3820 """)
3821
3822 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3823 m = email.message_from_bytes(self.latin_bin_msg)
3824 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3825
3826 def test_decoded_generator_emits_unicode_body(self):
3827 m = email.message_from_bytes(self.latin_bin_msg)
3828 out = StringIO()
3829 email.generator.DecodedGenerator(out).flatten(m)
3830 #DecodedHeader output contains an extra blank line compared
3831 #to the input message. RDM: not sure if this is a bug or not,
3832 #but it is not specific to the 8bit->7bit conversion.
3833 self.assertEqual(out.getvalue(),
3834 self.latin_bin_msg.decode('latin-1')+'\n')
3835
3836 def test_bytes_feedparser(self):
3837 bfp = email.feedparser.BytesFeedParser()
3838 for i in range(0, len(self.latin_bin_msg), 10):
3839 bfp.feed(self.latin_bin_msg[i:i+10])
3840 m = bfp.close()
3841 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3842
R. David Murray8451c4b2010-10-23 22:19:56 +00003843 def test_crlf_flatten(self):
3844 with openfile('msg_26.txt', 'rb') as fp:
3845 text = fp.read()
3846 msg = email.message_from_bytes(text)
3847 s = BytesIO()
3848 g = email.generator.BytesGenerator(s)
3849 g.flatten(msg, linesep='\r\n')
3850 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003851
3852 def test_8bit_multipart(self):
3853 # Issue 11605
3854 source = textwrap.dedent("""\
3855 Date: Fri, 18 Mar 2011 17:15:43 +0100
3856 To: foo@example.com
3857 From: foodwatch-Newsletter <bar@example.com>
3858 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3859 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3860 MIME-Version: 1.0
3861 Content-Type: multipart/alternative;
3862 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3863
3864 --b1_76a486bee62b0d200f33dc2ca08220ad
3865 Content-Type: text/plain; charset="utf-8"
3866 Content-Transfer-Encoding: 8bit
3867
3868 Guten Tag, ,
3869
3870 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3871 Nachrichten aus Japan.
3872
3873
3874 --b1_76a486bee62b0d200f33dc2ca08220ad
3875 Content-Type: text/html; charset="utf-8"
3876 Content-Transfer-Encoding: 8bit
3877
3878 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3879 "http://www.w3.org/TR/html4/loose.dtd">
3880 <html lang="de">
3881 <head>
3882 <title>foodwatch - Newsletter</title>
3883 </head>
3884 <body>
3885 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3886 die Nachrichten aus Japan.</p>
3887 </body>
3888 </html>
3889 --b1_76a486bee62b0d200f33dc2ca08220ad--
3890
3891 """).encode('utf-8')
3892 msg = email.message_from_bytes(source)
3893 s = BytesIO()
3894 g = email.generator.BytesGenerator(s)
3895 g.flatten(msg)
3896 self.assertEqual(s.getvalue(), source)
3897
R David Murray9fd170e2012-03-14 14:05:03 -04003898 def test_bytes_generator_b_encoding_linesep(self):
3899 # Issue 14062: b encoding was tacking on an extra \n.
3900 m = Message()
3901 # This has enough non-ascii that it should always end up b encoded.
3902 m['Subject'] = Header('žluťoučký kůň')
3903 s = BytesIO()
3904 g = email.generator.BytesGenerator(s)
3905 g.flatten(m, linesep='\r\n')
3906 self.assertEqual(
3907 s.getvalue(),
3908 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3909
3910 def test_generator_b_encoding_linesep(self):
3911 # Since this broke in ByteGenerator, test Generator for completeness.
3912 m = Message()
3913 # This has enough non-ascii that it should always end up b encoded.
3914 m['Subject'] = Header('žluťoučký kůň')
3915 s = StringIO()
3916 g = email.generator.Generator(s)
3917 g.flatten(m, linesep='\r\n')
3918 self.assertEqual(
3919 s.getvalue(),
3920 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3921
R. David Murray8451c4b2010-10-23 22:19:56 +00003922 maxDiff = None
3923
Ezio Melottib3aedd42010-11-20 19:04:17 +00003924
R. David Murray719a4492010-11-21 16:53:48 +00003925class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003926
R. David Murraye5db2632010-11-20 15:10:13 +00003927 maxDiff = None
3928
R. David Murray96fd54e2010-10-08 15:55:28 +00003929 def _msgobj(self, filename):
3930 with openfile(filename, 'rb') as fp:
3931 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003932 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003933 msg = email.message_from_bytes(data)
3934 return msg, data
3935
R. David Murray719a4492010-11-21 16:53:48 +00003936 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003937 b = BytesIO()
3938 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003939 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R David Murraya46ed112011-03-31 13:11:40 -04003940 self.assertEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003941
3942
R. David Murray719a4492010-11-21 16:53:48 +00003943class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3944 TestIdempotent):
3945 linesep = '\n'
3946 blinesep = b'\n'
3947 normalize_linesep_regex = re.compile(br'\r\n')
3948
3949
3950class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3951 TestIdempotent):
3952 linesep = '\r\n'
3953 blinesep = b'\r\n'
3954 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3955
Ezio Melottib3aedd42010-11-20 19:04:17 +00003956
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003957class TestBase64(unittest.TestCase):
3958 def test_len(self):
3959 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003960 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003961 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003962 for size in range(15):
3963 if size == 0 : bsize = 0
3964 elif size <= 3 : bsize = 4
3965 elif size <= 6 : bsize = 8
3966 elif size <= 9 : bsize = 12
3967 elif size <= 12: bsize = 16
3968 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003969 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003970
3971 def test_decode(self):
3972 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003973 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003974 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003975
3976 def test_encode(self):
3977 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003978 eq(base64mime.body_encode(b''), b'')
3979 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003980 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003981 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003982 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003983 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003984eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3985eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3986eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3987eHh4eCB4eHh4IA==
3988""")
3989 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003990 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003991 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003992eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3993eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3994eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3995eHh4eCB4eHh4IA==\r
3996""")
3997
3998 def test_header_encode(self):
3999 eq = self.assertEqual
4000 he = base64mime.header_encode
4001 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00004002 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
4003 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004004 # Test the charset option
4005 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
4006 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004007
4008
Ezio Melottib3aedd42010-11-20 19:04:17 +00004009
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004010class TestQuopri(unittest.TestCase):
4011 def setUp(self):
4012 # Set of characters (as byte integers) that don't need to be encoded
4013 # in headers.
4014 self.hlit = list(chain(
4015 range(ord('a'), ord('z') + 1),
4016 range(ord('A'), ord('Z') + 1),
4017 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00004018 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004019 # Set of characters (as byte integers) that do need to be encoded in
4020 # headers.
4021 self.hnon = [c for c in range(256) if c not in self.hlit]
4022 assert len(self.hlit) + len(self.hnon) == 256
4023 # Set of characters (as byte integers) that don't need to be encoded
4024 # in bodies.
4025 self.blit = list(range(ord(' '), ord('~') + 1))
4026 self.blit.append(ord('\t'))
4027 self.blit.remove(ord('='))
4028 # Set of characters (as byte integers) that do need to be encoded in
4029 # bodies.
4030 self.bnon = [c for c in range(256) if c not in self.blit]
4031 assert len(self.blit) + len(self.bnon) == 256
4032
Guido van Rossum9604e662007-08-30 03:46:43 +00004033 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004034 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004035 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00004036 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004037 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004038 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00004039 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004040
Guido van Rossum9604e662007-08-30 03:46:43 +00004041 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004042 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004043 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00004044 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004045 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004046 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00004047 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004048
4049 def test_header_quopri_len(self):
4050 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00004051 eq(quoprimime.header_length(b'hello'), 5)
4052 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004053 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00004054 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004055 # =?xxx?q?...?= means 10 extra characters
4056 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00004057 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
4058 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004059 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00004060 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004061 # =?xxx?q?...?= means 10 extra characters
4062 10)
4063 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00004064 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004065 'expected length 1 for %r' % chr(c))
4066 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00004067 # Space is special; it's encoded to _
4068 if c == ord(' '):
4069 continue
4070 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004071 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00004072 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004073
4074 def test_body_quopri_len(self):
4075 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004076 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00004077 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004078 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00004079 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004080
4081 def test_quote_unquote_idempotent(self):
4082 for x in range(256):
4083 c = chr(x)
4084 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
4085
R David Murrayec1b5b82011-03-23 14:19:05 -04004086 def _test_header_encode(self, header, expected_encoded_header, charset=None):
4087 if charset is None:
4088 encoded_header = quoprimime.header_encode(header)
4089 else:
4090 encoded_header = quoprimime.header_encode(header, charset)
4091 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004092
R David Murraycafd79d2011-03-23 15:25:55 -04004093 def test_header_encode_null(self):
4094 self._test_header_encode(b'', '')
4095
R David Murrayec1b5b82011-03-23 14:19:05 -04004096 def test_header_encode_one_word(self):
4097 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
4098
4099 def test_header_encode_two_lines(self):
4100 self._test_header_encode(b'hello\nworld',
4101 '=?iso-8859-1?q?hello=0Aworld?=')
4102
4103 def test_header_encode_non_ascii(self):
4104 self._test_header_encode(b'hello\xc7there',
4105 '=?iso-8859-1?q?hello=C7there?=')
4106
4107 def test_header_encode_alt_charset(self):
4108 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
4109 charset='iso-8859-2')
4110
4111 def _test_header_decode(self, encoded_header, expected_decoded_header):
4112 decoded_header = quoprimime.header_decode(encoded_header)
4113 self.assertEqual(decoded_header, expected_decoded_header)
4114
4115 def test_header_decode_null(self):
4116 self._test_header_decode('', '')
4117
4118 def test_header_decode_one_word(self):
4119 self._test_header_decode('hello', 'hello')
4120
4121 def test_header_decode_two_lines(self):
4122 self._test_header_decode('hello=0Aworld', 'hello\nworld')
4123
4124 def test_header_decode_non_ascii(self):
4125 self._test_header_decode('hello=C7there', 'hello\xc7there')
4126
Ezio Melotti2a99d5d2013-07-06 17:16:04 +02004127 def test_header_decode_re_bug_18380(self):
4128 # Issue 18380: Call re.sub with a positional argument for flags in the wrong position
4129 self.assertEqual(quoprimime.header_decode('=30' * 257), '0' * 257)
4130
R David Murrayec1b5b82011-03-23 14:19:05 -04004131 def _test_decode(self, encoded, expected_decoded, eol=None):
4132 if eol is None:
4133 decoded = quoprimime.decode(encoded)
4134 else:
4135 decoded = quoprimime.decode(encoded, eol=eol)
4136 self.assertEqual(decoded, expected_decoded)
4137
4138 def test_decode_null_word(self):
4139 self._test_decode('', '')
4140
4141 def test_decode_null_line_null_word(self):
4142 self._test_decode('\r\n', '\n')
4143
4144 def test_decode_one_word(self):
4145 self._test_decode('hello', 'hello')
4146
4147 def test_decode_one_word_eol(self):
4148 self._test_decode('hello', 'hello', eol='X')
4149
4150 def test_decode_one_line(self):
4151 self._test_decode('hello\r\n', 'hello\n')
4152
4153 def test_decode_one_line_lf(self):
4154 self._test_decode('hello\n', 'hello\n')
4155
R David Murraycafd79d2011-03-23 15:25:55 -04004156 def test_decode_one_line_cr(self):
4157 self._test_decode('hello\r', 'hello\n')
4158
4159 def test_decode_one_line_nl(self):
4160 self._test_decode('hello\n', 'helloX', eol='X')
4161
4162 def test_decode_one_line_crnl(self):
4163 self._test_decode('hello\r\n', 'helloX', eol='X')
4164
R David Murrayec1b5b82011-03-23 14:19:05 -04004165 def test_decode_one_line_one_word(self):
4166 self._test_decode('hello\r\nworld', 'hello\nworld')
4167
4168 def test_decode_one_line_one_word_eol(self):
4169 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
4170
4171 def test_decode_two_lines(self):
4172 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
4173
R David Murraycafd79d2011-03-23 15:25:55 -04004174 def test_decode_two_lines_eol(self):
4175 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
4176
R David Murrayec1b5b82011-03-23 14:19:05 -04004177 def test_decode_one_long_line(self):
4178 self._test_decode('Spam' * 250, 'Spam' * 250)
4179
4180 def test_decode_one_space(self):
4181 self._test_decode(' ', '')
4182
4183 def test_decode_multiple_spaces(self):
4184 self._test_decode(' ' * 5, '')
4185
4186 def test_decode_one_line_trailing_spaces(self):
4187 self._test_decode('hello \r\n', 'hello\n')
4188
4189 def test_decode_two_lines_trailing_spaces(self):
4190 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
4191
4192 def test_decode_quoted_word(self):
4193 self._test_decode('=22quoted=20words=22', '"quoted words"')
4194
4195 def test_decode_uppercase_quoting(self):
4196 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
4197
4198 def test_decode_lowercase_quoting(self):
4199 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
4200
4201 def test_decode_soft_line_break(self):
4202 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
4203
4204 def test_decode_false_quoting(self):
4205 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
4206
4207 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
4208 kwargs = {}
4209 if maxlinelen is None:
4210 # Use body_encode's default.
4211 maxlinelen = 76
4212 else:
4213 kwargs['maxlinelen'] = maxlinelen
4214 if eol is None:
4215 # Use body_encode's default.
4216 eol = '\n'
4217 else:
4218 kwargs['eol'] = eol
4219 encoded_body = quoprimime.body_encode(body, **kwargs)
4220 self.assertEqual(encoded_body, expected_encoded_body)
4221 if eol == '\n' or eol == '\r\n':
4222 # We know how to split the result back into lines, so maxlinelen
4223 # can be checked.
4224 for line in encoded_body.splitlines():
4225 self.assertLessEqual(len(line), maxlinelen)
4226
4227 def test_encode_null(self):
4228 self._test_encode('', '')
4229
4230 def test_encode_null_lines(self):
4231 self._test_encode('\n\n', '\n\n')
4232
4233 def test_encode_one_line(self):
4234 self._test_encode('hello\n', 'hello\n')
4235
4236 def test_encode_one_line_crlf(self):
4237 self._test_encode('hello\r\n', 'hello\n')
4238
4239 def test_encode_one_line_eol(self):
4240 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
4241
4242 def test_encode_one_space(self):
4243 self._test_encode(' ', '=20')
4244
4245 def test_encode_one_line_one_space(self):
4246 self._test_encode(' \n', '=20\n')
4247
R David Murrayb938c8c2011-03-24 12:19:26 -04004248# XXX: body_encode() expect strings, but uses ord(char) from these strings
4249# to index into a 256-entry list. For code points above 255, this will fail.
4250# Should there be a check for 8-bit only ord() values in body, or at least
4251# a comment about the expected input?
4252
4253 def test_encode_two_lines_one_space(self):
4254 self._test_encode(' \n \n', '=20\n=20\n')
4255
R David Murrayec1b5b82011-03-23 14:19:05 -04004256 def test_encode_one_word_trailing_spaces(self):
4257 self._test_encode('hello ', 'hello =20')
4258
4259 def test_encode_one_line_trailing_spaces(self):
4260 self._test_encode('hello \n', 'hello =20\n')
4261
4262 def test_encode_one_word_trailing_tab(self):
4263 self._test_encode('hello \t', 'hello =09')
4264
4265 def test_encode_one_line_trailing_tab(self):
4266 self._test_encode('hello \t\n', 'hello =09\n')
4267
4268 def test_encode_trailing_space_before_maxlinelen(self):
4269 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
4270
R David Murrayb938c8c2011-03-24 12:19:26 -04004271 def test_encode_trailing_space_at_maxlinelen(self):
4272 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
4273
R David Murrayec1b5b82011-03-23 14:19:05 -04004274 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04004275 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
4276
4277 def test_encode_whitespace_lines(self):
4278 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04004279
4280 def test_encode_quoted_equals(self):
4281 self._test_encode('a = b', 'a =3D b')
4282
4283 def test_encode_one_long_string(self):
4284 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
4285
4286 def test_encode_one_long_line(self):
4287 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4288
4289 def test_encode_one_very_long_line(self):
4290 self._test_encode('x' * 200 + '\n',
4291 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
4292
R David Murrayec1b5b82011-03-23 14:19:05 -04004293 def test_encode_shortest_maxlinelen(self):
4294 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004295
R David Murrayb938c8c2011-03-24 12:19:26 -04004296 def test_encode_maxlinelen_too_small(self):
4297 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
4298
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004299 def test_encode(self):
4300 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00004301 eq(quoprimime.body_encode(''), '')
4302 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004303 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00004304 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004305 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00004306 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004307xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
4308 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
4309x xxxx xxxx xxxx xxxx=20""")
4310 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00004311 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4312 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004313xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
4314 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
4315x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00004316 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004317one line
4318
4319two line"""), """\
4320one line
4321
4322two line""")
4323
4324
Ezio Melottib3aedd42010-11-20 19:04:17 +00004325
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004326# Test the Charset class
4327class TestCharset(unittest.TestCase):
4328 def tearDown(self):
4329 from email import charset as CharsetModule
4330 try:
4331 del CharsetModule.CHARSETS['fake']
4332 except KeyError:
4333 pass
4334
Guido van Rossum9604e662007-08-30 03:46:43 +00004335 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004336 eq = self.assertEqual
4337 # Make sure us-ascii = no Unicode conversion
4338 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00004339 eq(c.header_encode('Hello World!'), 'Hello World!')
4340 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004341 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00004342 self.assertRaises(UnicodeError, c.header_encode, s)
4343 c = Charset('utf-8')
4344 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004345
4346 def test_body_encode(self):
4347 eq = self.assertEqual
4348 # Try a charset with QP body encoding
4349 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004350 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004351 # Try a charset with Base64 body encoding
4352 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00004353 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004354 # Try a charset with None body encoding
4355 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004356 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004357 # Try the convert argument, where input codec != output codec
4358 c = Charset('euc-jp')
4359 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00004360 # XXX FIXME
4361## try:
4362## eq('\x1b$B5FCO;~IW\x1b(B',
4363## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4364## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4365## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4366## except LookupError:
4367## # We probably don't have the Japanese codecs installed
4368## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004369 # Testing SF bug #625509, which we have to fake, since there are no
4370 # built-in encodings where the header encoding is QP but the body
4371 # encoding is not.
4372 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04004373 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004374 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04004375 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004376
4377 def test_unicode_charset_name(self):
4378 charset = Charset('us-ascii')
4379 self.assertEqual(str(charset), 'us-ascii')
4380 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4381
4382
Ezio Melottib3aedd42010-11-20 19:04:17 +00004383
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004384# Test multilingual MIME headers.
4385class TestHeader(TestEmailBase):
4386 def test_simple(self):
4387 eq = self.ndiffAssertEqual
4388 h = Header('Hello World!')
4389 eq(h.encode(), 'Hello World!')
4390 h.append(' Goodbye World!')
4391 eq(h.encode(), 'Hello World! Goodbye World!')
4392
4393 def test_simple_surprise(self):
4394 eq = self.ndiffAssertEqual
4395 h = Header('Hello World!')
4396 eq(h.encode(), 'Hello World!')
4397 h.append('Goodbye World!')
4398 eq(h.encode(), 'Hello World! Goodbye World!')
4399
4400 def test_header_needs_no_decoding(self):
4401 h = 'no decoding needed'
4402 self.assertEqual(decode_header(h), [(h, None)])
4403
4404 def test_long(self):
4405 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4406 maxlinelen=76)
4407 for l in h.encode(splitchars=' ').split('\n '):
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02004408 self.assertLessEqual(len(l), 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004409
4410 def test_multilingual(self):
4411 eq = self.ndiffAssertEqual
4412 g = Charset("iso-8859-1")
4413 cz = Charset("iso-8859-2")
4414 utf8 = Charset("utf-8")
4415 g_head = (b'Die Mieter treten hier ein werden mit einem '
4416 b'Foerderband komfortabel den Korridor entlang, '
4417 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4418 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4419 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4420 b'd\xf9vtipu.. ')
4421 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4422 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4423 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4424 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4425 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4426 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4427 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4428 '\u3044\u307e\u3059\u3002')
4429 h = Header(g_head, g)
4430 h.append(cz_head, cz)
4431 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004432 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004433 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004434=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4435 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4436 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4437 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004438 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4439 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4440 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4441 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004442 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4443 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4444 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4445 decoded = decode_header(enc)
4446 eq(len(decoded), 3)
4447 eq(decoded[0], (g_head, 'iso-8859-1'))
4448 eq(decoded[1], (cz_head, 'iso-8859-2'))
4449 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004450 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004451 eq(ustr,
4452 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4453 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4454 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4455 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4456 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4457 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4458 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4459 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4460 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4461 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4462 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4463 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4464 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4465 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4466 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4467 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4468 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004469 # Test make_header()
4470 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004471 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004472
4473 def test_empty_header_encode(self):
4474 h = Header()
4475 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004476
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004477 def test_header_ctor_default_args(self):
4478 eq = self.ndiffAssertEqual
4479 h = Header()
4480 eq(h, '')
4481 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004482 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004483
4484 def test_explicit_maxlinelen(self):
4485 eq = self.ndiffAssertEqual
4486 hstr = ('A very long line that must get split to something other '
4487 'than at the 76th character boundary to test the non-default '
4488 'behavior')
4489 h = Header(hstr)
4490 eq(h.encode(), '''\
4491A very long line that must get split to something other than at the 76th
4492 character boundary to test the non-default behavior''')
4493 eq(str(h), hstr)
4494 h = Header(hstr, header_name='Subject')
4495 eq(h.encode(), '''\
4496A very long line that must get split to something other than at the
4497 76th character boundary to test the non-default behavior''')
4498 eq(str(h), hstr)
4499 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4500 eq(h.encode(), hstr)
4501 eq(str(h), hstr)
4502
Guido van Rossum9604e662007-08-30 03:46:43 +00004503 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004504 eq = self.ndiffAssertEqual
4505 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004506 x = 'xxxx ' * 20
4507 h.append(x)
4508 s = h.encode()
4509 eq(s, """\
4510=?iso-8859-1?q?xxx?=
4511 =?iso-8859-1?q?x_?=
4512 =?iso-8859-1?q?xx?=
4513 =?iso-8859-1?q?xx?=
4514 =?iso-8859-1?q?_x?=
4515 =?iso-8859-1?q?xx?=
4516 =?iso-8859-1?q?x_?=
4517 =?iso-8859-1?q?xx?=
4518 =?iso-8859-1?q?xx?=
4519 =?iso-8859-1?q?_x?=
4520 =?iso-8859-1?q?xx?=
4521 =?iso-8859-1?q?x_?=
4522 =?iso-8859-1?q?xx?=
4523 =?iso-8859-1?q?xx?=
4524 =?iso-8859-1?q?_x?=
4525 =?iso-8859-1?q?xx?=
4526 =?iso-8859-1?q?x_?=
4527 =?iso-8859-1?q?xx?=
4528 =?iso-8859-1?q?xx?=
4529 =?iso-8859-1?q?_x?=
4530 =?iso-8859-1?q?xx?=
4531 =?iso-8859-1?q?x_?=
4532 =?iso-8859-1?q?xx?=
4533 =?iso-8859-1?q?xx?=
4534 =?iso-8859-1?q?_x?=
4535 =?iso-8859-1?q?xx?=
4536 =?iso-8859-1?q?x_?=
4537 =?iso-8859-1?q?xx?=
4538 =?iso-8859-1?q?xx?=
4539 =?iso-8859-1?q?_x?=
4540 =?iso-8859-1?q?xx?=
4541 =?iso-8859-1?q?x_?=
4542 =?iso-8859-1?q?xx?=
4543 =?iso-8859-1?q?xx?=
4544 =?iso-8859-1?q?_x?=
4545 =?iso-8859-1?q?xx?=
4546 =?iso-8859-1?q?x_?=
4547 =?iso-8859-1?q?xx?=
4548 =?iso-8859-1?q?xx?=
4549 =?iso-8859-1?q?_x?=
4550 =?iso-8859-1?q?xx?=
4551 =?iso-8859-1?q?x_?=
4552 =?iso-8859-1?q?xx?=
4553 =?iso-8859-1?q?xx?=
4554 =?iso-8859-1?q?_x?=
4555 =?iso-8859-1?q?xx?=
4556 =?iso-8859-1?q?x_?=
4557 =?iso-8859-1?q?xx?=
4558 =?iso-8859-1?q?xx?=
4559 =?iso-8859-1?q?_?=""")
4560 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004561 h = Header(charset='iso-8859-1', maxlinelen=40)
4562 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004563 s = h.encode()
4564 eq(s, """\
4565=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4566 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4567 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4568 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4569 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4570 eq(x, str(make_header(decode_header(s))))
4571
4572 def test_base64_splittable(self):
4573 eq = self.ndiffAssertEqual
4574 h = Header(charset='koi8-r', maxlinelen=20)
4575 x = 'xxxx ' * 20
4576 h.append(x)
4577 s = h.encode()
4578 eq(s, """\
4579=?koi8-r?b?eHh4?=
4580 =?koi8-r?b?eCB4?=
4581 =?koi8-r?b?eHh4?=
4582 =?koi8-r?b?IHh4?=
4583 =?koi8-r?b?eHgg?=
4584 =?koi8-r?b?eHh4?=
4585 =?koi8-r?b?eCB4?=
4586 =?koi8-r?b?eHh4?=
4587 =?koi8-r?b?IHh4?=
4588 =?koi8-r?b?eHgg?=
4589 =?koi8-r?b?eHh4?=
4590 =?koi8-r?b?eCB4?=
4591 =?koi8-r?b?eHh4?=
4592 =?koi8-r?b?IHh4?=
4593 =?koi8-r?b?eHgg?=
4594 =?koi8-r?b?eHh4?=
4595 =?koi8-r?b?eCB4?=
4596 =?koi8-r?b?eHh4?=
4597 =?koi8-r?b?IHh4?=
4598 =?koi8-r?b?eHgg?=
4599 =?koi8-r?b?eHh4?=
4600 =?koi8-r?b?eCB4?=
4601 =?koi8-r?b?eHh4?=
4602 =?koi8-r?b?IHh4?=
4603 =?koi8-r?b?eHgg?=
4604 =?koi8-r?b?eHh4?=
4605 =?koi8-r?b?eCB4?=
4606 =?koi8-r?b?eHh4?=
4607 =?koi8-r?b?IHh4?=
4608 =?koi8-r?b?eHgg?=
4609 =?koi8-r?b?eHh4?=
4610 =?koi8-r?b?eCB4?=
4611 =?koi8-r?b?eHh4?=
4612 =?koi8-r?b?IA==?=""")
4613 eq(x, str(make_header(decode_header(s))))
4614 h = Header(charset='koi8-r', maxlinelen=40)
4615 h.append(x)
4616 s = h.encode()
4617 eq(s, """\
4618=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4619 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4620 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4621 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4622 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4623 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4624 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004625
4626 def test_us_ascii_header(self):
4627 eq = self.assertEqual
4628 s = 'hello'
4629 x = decode_header(s)
4630 eq(x, [('hello', None)])
4631 h = make_header(x)
4632 eq(s, h.encode())
4633
4634 def test_string_charset(self):
4635 eq = self.assertEqual
4636 h = Header()
4637 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004638 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004639
4640## def test_unicode_error(self):
4641## raises = self.assertRaises
4642## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4643## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4644## h = Header()
4645## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4646## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4647## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4648
4649 def test_utf8_shortest(self):
4650 eq = self.assertEqual
4651 h = Header('p\xf6stal', 'utf-8')
4652 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4653 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4654 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4655
4656 def test_bad_8bit_header(self):
4657 raises = self.assertRaises
4658 eq = self.assertEqual
4659 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4660 raises(UnicodeError, Header, x)
4661 h = Header()
4662 raises(UnicodeError, h.append, x)
4663 e = x.decode('utf-8', 'replace')
4664 eq(str(Header(x, errors='replace')), e)
4665 h.append(x, errors='replace')
4666 eq(str(h), e)
4667
R David Murray041015c2011-03-25 15:10:55 -04004668 def test_escaped_8bit_header(self):
4669 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004670 e = x.decode('ascii', 'surrogateescape')
4671 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004672 self.assertEqual(str(h),
4673 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4674 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4675
R David Murraye5e366c2011-06-18 12:57:28 -04004676 def test_header_handles_binary_unknown8bit(self):
4677 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4678 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4679 self.assertEqual(str(h),
4680 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4681 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4682
4683 def test_make_header_handles_binary_unknown8bit(self):
4684 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4685 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4686 h2 = email.header.make_header(email.header.decode_header(h))
4687 self.assertEqual(str(h2),
4688 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4689 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4690
R David Murray041015c2011-03-25 15:10:55 -04004691 def test_modify_returned_list_does_not_change_header(self):
4692 h = Header('test')
4693 chunks = email.header.decode_header(h)
4694 chunks.append(('ascii', 'test2'))
4695 self.assertEqual(str(h), 'test')
4696
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004697 def test_encoded_adjacent_nonencoded(self):
4698 eq = self.assertEqual
4699 h = Header()
4700 h.append('hello', 'iso-8859-1')
4701 h.append('world')
4702 s = h.encode()
4703 eq(s, '=?iso-8859-1?q?hello?= world')
4704 h = make_header(decode_header(s))
4705 eq(h.encode(), s)
4706
R David Murray07ea53c2012-06-02 17:56:49 -04004707 def test_whitespace_keeper(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004708 eq = self.assertEqual
4709 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4710 parts = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04004711 eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004712 hdr = make_header(parts)
4713 eq(hdr.encode(),
4714 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4715
4716 def test_broken_base64_header(self):
4717 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004718 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004719 raises(errors.HeaderParseError, decode_header, s)
4720
R. David Murray477efb32011-01-05 01:39:32 +00004721 def test_shift_jis_charset(self):
4722 h = Header('文', charset='shift_jis')
4723 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4724
R David Murrayde912762011-03-16 18:26:23 -04004725 def test_flatten_header_with_no_value(self):
4726 # Issue 11401 (regression from email 4.x) Note that the space after
4727 # the header doesn't reflect the input, but this is also the way
4728 # email 4.x behaved. At some point it would be nice to fix that.
4729 msg = email.message_from_string("EmptyHeader:")
4730 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4731
R David Murray01581ee2011-04-18 10:04:34 -04004732 def test_encode_preserves_leading_ws_on_value(self):
4733 msg = Message()
4734 msg['SomeHeader'] = ' value with leading ws'
4735 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4736
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004737
Ezio Melottib3aedd42010-11-20 19:04:17 +00004738
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004739# Test RFC 2231 header parameters (en/de)coding
4740class TestRFC2231(TestEmailBase):
R David Murray97f43c02012-06-24 05:03:27 -04004741
4742 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
4743 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004744 def test_get_param(self):
4745 eq = self.assertEqual
4746 msg = self._msgobj('msg_29.txt')
4747 eq(msg.get_param('title'),
4748 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4749 eq(msg.get_param('title', unquote=False),
4750 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4751
4752 def test_set_param(self):
4753 eq = self.ndiffAssertEqual
4754 msg = Message()
4755 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4756 charset='us-ascii')
4757 eq(msg.get_param('title'),
4758 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4759 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4760 charset='us-ascii', language='en')
4761 eq(msg.get_param('title'),
4762 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4763 msg = self._msgobj('msg_01.txt')
4764 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4765 charset='us-ascii', language='en')
4766 eq(msg.as_string(maxheaderlen=78), """\
4767Return-Path: <bbb@zzz.org>
4768Delivered-To: bbb@zzz.org
4769Received: by mail.zzz.org (Postfix, from userid 889)
4770\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4771MIME-Version: 1.0
4772Content-Transfer-Encoding: 7bit
4773Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4774From: bbb@ddd.com (John X. Doe)
4775To: bbb@zzz.org
4776Subject: This is a test message
4777Date: Fri, 4 May 2001 14:05:44 -0400
4778Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004779 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004780
4781
4782Hi,
4783
4784Do you like this message?
4785
4786-Me
4787""")
4788
R David Murraya2860e82011-04-16 09:20:30 -04004789 def test_set_param_requote(self):
4790 msg = Message()
4791 msg.set_param('title', 'foo')
4792 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4793 msg.set_param('title', 'bar', requote=False)
4794 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4795 # tspecial is still quoted.
4796 msg.set_param('title', "(bar)bell", requote=False)
4797 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4798
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004799 def test_del_param(self):
4800 eq = self.ndiffAssertEqual
4801 msg = self._msgobj('msg_01.txt')
4802 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4803 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4804 charset='us-ascii', language='en')
4805 msg.del_param('foo', header='Content-Type')
4806 eq(msg.as_string(maxheaderlen=78), """\
4807Return-Path: <bbb@zzz.org>
4808Delivered-To: bbb@zzz.org
4809Received: by mail.zzz.org (Postfix, from userid 889)
4810\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4811MIME-Version: 1.0
4812Content-Transfer-Encoding: 7bit
4813Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4814From: bbb@ddd.com (John X. Doe)
4815To: bbb@zzz.org
4816Subject: This is a test message
4817Date: Fri, 4 May 2001 14:05:44 -0400
4818Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004819 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004820
4821
4822Hi,
4823
4824Do you like this message?
4825
4826-Me
4827""")
4828
R David Murray97f43c02012-06-24 05:03:27 -04004829 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset
4830 # I changed the charset name, though, because the one in the file isn't
4831 # a legal charset name. Should add a test for an illegal charset.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004832 def test_rfc2231_get_content_charset(self):
4833 eq = self.assertEqual
4834 msg = self._msgobj('msg_32.txt')
4835 eq(msg.get_content_charset(), 'us-ascii')
4836
R David Murray97f43c02012-06-24 05:03:27 -04004837 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes
R. David Murraydfd7eb02010-12-24 22:36:49 +00004838 def test_rfc2231_parse_rfc_quoting(self):
4839 m = textwrap.dedent('''\
4840 Content-Disposition: inline;
4841 \tfilename*0*=''This%20is%20even%20more%20;
4842 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4843 \tfilename*2="is it not.pdf"
4844
4845 ''')
4846 msg = email.message_from_string(m)
4847 self.assertEqual(msg.get_filename(),
4848 'This is even more ***fun*** is it not.pdf')
4849 self.assertEqual(m, msg.as_string())
4850
R David Murray97f43c02012-06-24 05:03:27 -04004851 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
R. David Murraydfd7eb02010-12-24 22:36:49 +00004852 def test_rfc2231_parse_extra_quoting(self):
4853 m = textwrap.dedent('''\
4854 Content-Disposition: inline;
4855 \tfilename*0*="''This%20is%20even%20more%20";
4856 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4857 \tfilename*2="is it not.pdf"
4858
4859 ''')
4860 msg = email.message_from_string(m)
4861 self.assertEqual(msg.get_filename(),
4862 'This is even more ***fun*** is it not.pdf')
4863 self.assertEqual(m, msg.as_string())
4864
R David Murray97f43c02012-06-24 05:03:27 -04004865 # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset
4866 # but new test uses *0* because otherwise lang/charset is not valid.
4867 # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004868 def test_rfc2231_no_language_or_charset(self):
4869 m = '''\
4870Content-Transfer-Encoding: 8bit
4871Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4872Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4873
4874'''
4875 msg = email.message_from_string(m)
4876 param = msg.get_param('NAME')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02004877 self.assertNotIsInstance(param, tuple)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004878 self.assertEqual(
4879 param,
4880 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4881
R David Murray97f43c02012-06-24 05:03:27 -04004882 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004883 def test_rfc2231_no_language_or_charset_in_filename(self):
4884 m = '''\
4885Content-Disposition: inline;
4886\tfilename*0*="''This%20is%20even%20more%20";
4887\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4888\tfilename*2="is it not.pdf"
4889
4890'''
4891 msg = email.message_from_string(m)
4892 self.assertEqual(msg.get_filename(),
4893 'This is even more ***fun*** is it not.pdf')
4894
R David Murray97f43c02012-06-24 05:03:27 -04004895 # Duplicate of previous test?
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004896 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4897 m = '''\
4898Content-Disposition: inline;
4899\tfilename*0*="''This%20is%20even%20more%20";
4900\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4901\tfilename*2="is it not.pdf"
4902
4903'''
4904 msg = email.message_from_string(m)
4905 self.assertEqual(msg.get_filename(),
4906 'This is even more ***fun*** is it not.pdf')
4907
R David Murray97f43c02012-06-24 05:03:27 -04004908 # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded,
4909 # but the test below is wrong (the first part should be decoded).
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004910 def test_rfc2231_partly_encoded(self):
4911 m = '''\
4912Content-Disposition: inline;
4913\tfilename*0="''This%20is%20even%20more%20";
4914\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4915\tfilename*2="is it not.pdf"
4916
4917'''
4918 msg = email.message_from_string(m)
4919 self.assertEqual(
4920 msg.get_filename(),
4921 'This%20is%20even%20more%20***fun*** is it not.pdf')
4922
4923 def test_rfc2231_partly_nonencoded(self):
4924 m = '''\
4925Content-Disposition: inline;
4926\tfilename*0="This%20is%20even%20more%20";
4927\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4928\tfilename*2="is it not.pdf"
4929
4930'''
4931 msg = email.message_from_string(m)
4932 self.assertEqual(
4933 msg.get_filename(),
4934 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4935
4936 def test_rfc2231_no_language_or_charset_in_boundary(self):
4937 m = '''\
4938Content-Type: multipart/alternative;
4939\tboundary*0*="''This%20is%20even%20more%20";
4940\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4941\tboundary*2="is it not.pdf"
4942
4943'''
4944 msg = email.message_from_string(m)
4945 self.assertEqual(msg.get_boundary(),
4946 'This is even more ***fun*** is it not.pdf')
4947
4948 def test_rfc2231_no_language_or_charset_in_charset(self):
4949 # This is a nonsensical charset value, but tests the code anyway
4950 m = '''\
4951Content-Type: text/plain;
4952\tcharset*0*="This%20is%20even%20more%20";
4953\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4954\tcharset*2="is it not.pdf"
4955
4956'''
4957 msg = email.message_from_string(m)
4958 self.assertEqual(msg.get_content_charset(),
4959 'this is even more ***fun*** is it not.pdf')
4960
R David Murray97f43c02012-06-24 05:03:27 -04004961 # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004962 def test_rfc2231_bad_encoding_in_filename(self):
4963 m = '''\
4964Content-Disposition: inline;
4965\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4966\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4967\tfilename*2="is it not.pdf"
4968
4969'''
4970 msg = email.message_from_string(m)
4971 self.assertEqual(msg.get_filename(),
4972 'This is even more ***fun*** is it not.pdf')
4973
4974 def test_rfc2231_bad_encoding_in_charset(self):
4975 m = """\
4976Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4977
4978"""
4979 msg = email.message_from_string(m)
4980 # This should return None because non-ascii characters in the charset
4981 # are not allowed.
4982 self.assertEqual(msg.get_content_charset(), None)
4983
4984 def test_rfc2231_bad_character_in_charset(self):
4985 m = """\
4986Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4987
4988"""
4989 msg = email.message_from_string(m)
4990 # This should return None because non-ascii characters in the charset
4991 # are not allowed.
4992 self.assertEqual(msg.get_content_charset(), None)
4993
4994 def test_rfc2231_bad_character_in_filename(self):
4995 m = '''\
4996Content-Disposition: inline;
4997\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4998\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4999\tfilename*2*="is it not.pdf%E2"
5000
5001'''
5002 msg = email.message_from_string(m)
5003 self.assertEqual(msg.get_filename(),
5004 'This is even more ***fun*** is it not.pdf\ufffd')
5005
5006 def test_rfc2231_unknown_encoding(self):
5007 m = """\
5008Content-Transfer-Encoding: 8bit
5009Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
5010
5011"""
5012 msg = email.message_from_string(m)
5013 self.assertEqual(msg.get_filename(), 'myfile.txt')
5014
5015 def test_rfc2231_single_tick_in_filename_extended(self):
5016 eq = self.assertEqual
5017 m = """\
5018Content-Type: application/x-foo;
5019\tname*0*=\"Frank's\"; name*1*=\" Document\"
5020
5021"""
5022 msg = email.message_from_string(m)
5023 charset, language, s = msg.get_param('name')
5024 eq(charset, None)
5025 eq(language, None)
5026 eq(s, "Frank's Document")
5027
R David Murray97f43c02012-06-24 05:03:27 -04005028 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005029 def test_rfc2231_single_tick_in_filename(self):
5030 m = """\
5031Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
5032
5033"""
5034 msg = email.message_from_string(m)
5035 param = msg.get_param('name')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02005036 self.assertNotIsInstance(param, tuple)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005037 self.assertEqual(param, "Frank's Document")
5038
R David Murray97f43c02012-06-24 05:03:27 -04005039 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005040 def test_rfc2231_tick_attack_extended(self):
5041 eq = self.assertEqual
5042 m = """\
5043Content-Type: application/x-foo;
5044\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
5045
5046"""
5047 msg = email.message_from_string(m)
5048 charset, language, s = msg.get_param('name')
5049 eq(charset, 'us-ascii')
5050 eq(language, 'en-us')
5051 eq(s, "Frank's Document")
5052
R David Murray97f43c02012-06-24 05:03:27 -04005053 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005054 def test_rfc2231_tick_attack(self):
5055 m = """\
5056Content-Type: application/x-foo;
5057\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
5058
5059"""
5060 msg = email.message_from_string(m)
5061 param = msg.get_param('name')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02005062 self.assertNotIsInstance(param, tuple)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005063 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
5064
R David Murray97f43c02012-06-24 05:03:27 -04005065 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005066 def test_rfc2231_no_extended_values(self):
5067 eq = self.assertEqual
5068 m = """\
5069Content-Type: application/x-foo; name=\"Frank's Document\"
5070
5071"""
5072 msg = email.message_from_string(m)
5073 eq(msg.get_param('name'), "Frank's Document")
5074
R David Murray97f43c02012-06-24 05:03:27 -04005075 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005076 def test_rfc2231_encoded_then_unencoded_segments(self):
5077 eq = self.assertEqual
5078 m = """\
5079Content-Type: application/x-foo;
5080\tname*0*=\"us-ascii'en-us'My\";
5081\tname*1=\" Document\";
5082\tname*2*=\" For You\"
5083
5084"""
5085 msg = email.message_from_string(m)
5086 charset, language, s = msg.get_param('name')
5087 eq(charset, 'us-ascii')
5088 eq(language, 'en-us')
5089 eq(s, 'My Document For You')
5090
R David Murray97f43c02012-06-24 05:03:27 -04005091 # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments
5092 # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005093 def test_rfc2231_unencoded_then_encoded_segments(self):
5094 eq = self.assertEqual
5095 m = """\
5096Content-Type: application/x-foo;
5097\tname*0=\"us-ascii'en-us'My\";
5098\tname*1*=\" Document\";
5099\tname*2*=\" For You\"
5100
5101"""
5102 msg = email.message_from_string(m)
5103 charset, language, s = msg.get_param('name')
5104 eq(charset, 'us-ascii')
5105 eq(language, 'en-us')
5106 eq(s, 'My Document For You')
5107
5108
Ezio Melottib3aedd42010-11-20 19:04:17 +00005109
R. David Murraya8f480f2010-01-16 18:30:03 +00005110# Tests to ensure that signed parts of an email are completely preserved, as
5111# required by RFC1847 section 2.1. Note that these are incomplete, because the
5112# email package does not currently always preserve the body. See issue 1670765.
5113class TestSigned(TestEmailBase):
5114
5115 def _msg_and_obj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -04005116 with openfile(filename) as fp:
R. David Murraya8f480f2010-01-16 18:30:03 +00005117 original = fp.read()
5118 msg = email.message_from_string(original)
5119 return original, msg
5120
5121 def _signed_parts_eq(self, original, result):
5122 # Extract the first mime part of each message
5123 import re
5124 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
5125 inpart = repart.search(original).group(2)
5126 outpart = repart.search(result).group(2)
5127 self.assertEqual(outpart, inpart)
5128
5129 def test_long_headers_as_string(self):
5130 original, msg = self._msg_and_obj('msg_45.txt')
5131 result = msg.as_string()
5132 self._signed_parts_eq(original, result)
5133
5134 def test_long_headers_as_string_maxheaderlen(self):
5135 original, msg = self._msg_and_obj('msg_45.txt')
5136 result = msg.as_string(maxheaderlen=60)
5137 self._signed_parts_eq(original, result)
5138
5139 def test_long_headers_flatten(self):
5140 original, msg = self._msg_and_obj('msg_45.txt')
5141 fp = StringIO()
5142 Generator(fp).flatten(msg)
5143 result = fp.getvalue()
5144 self._signed_parts_eq(original, result)
5145
5146
Ezio Melottib3aedd42010-11-20 19:04:17 +00005147
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005148if __name__ == '__main__':
R David Murray9aaba782011-03-21 17:17:06 -04005149 unittest.main()