blob: 23f062fd80ddf8fa365ac5ff00a4c128c7dd575e [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
R. David Murray719a4492010-11-21 16:53:48 +00005import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00006import time
7import base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +00008import unittest
R. David Murray96fd54e2010-10-08 15:55:28 +00009import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000010
R. David Murray96fd54e2010-10-08 15:55:28 +000011from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000012from itertools import chain
13
14import email
R David Murrayc27e5222012-05-25 15:01:48 -040015import email.policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016
17from email.charset import Charset
18from email.header import Header, decode_header, make_header
19from email.parser import Parser, HeaderParser
R David Murray638d40b2012-08-24 11:14:13 -040020from email.generator import Generator, DecodedGenerator, BytesGenerator
Guido van Rossum8b3febe2007-08-30 01:15:14 +000021from email.message import Message
22from email.mime.application import MIMEApplication
23from email.mime.audio import MIMEAudio
24from email.mime.text import MIMEText
25from email.mime.image import MIMEImage
26from email.mime.base import MIMEBase
27from email.mime.message import MIMEMessage
28from email.mime.multipart import MIMEMultipart
29from email import utils
30from email import errors
31from email import encoders
32from email import iterators
33from email import base64mime
34from email import quoprimime
35
R David Murray965794e2013-03-07 18:16:47 -050036from test.support import unlink
R David Murraya256bac2011-03-31 12:20:23 -040037from test.test_email import openfile, TestEmailBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +000038
39NL = '\n'
40EMPTYSTRING = ''
41SPACE = ' '
42
43
Guido van Rossum8b3febe2007-08-30 01:15:14 +000044# Test various aspects of the Message class's API
45class TestMessageAPI(TestEmailBase):
46 def test_get_all(self):
47 eq = self.assertEqual
48 msg = self._msgobj('msg_20.txt')
49 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
50 eq(msg.get_all('xx', 'n/a'), 'n/a')
51
R. David Murraye5db2632010-11-20 15:10:13 +000052 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000053 eq = self.assertEqual
54 msg = Message()
55 eq(msg.get_charset(), None)
56 charset = Charset('iso-8859-1')
57 msg.set_charset(charset)
58 eq(msg['mime-version'], '1.0')
59 eq(msg.get_content_type(), 'text/plain')
60 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
61 eq(msg.get_param('charset'), 'iso-8859-1')
62 eq(msg['content-transfer-encoding'], 'quoted-printable')
63 eq(msg.get_charset().input_charset, 'iso-8859-1')
64 # Remove the charset
65 msg.set_charset(None)
66 eq(msg.get_charset(), None)
67 eq(msg['content-type'], 'text/plain')
68 # Try adding a charset when there's already MIME headers present
69 msg = Message()
70 msg['MIME-Version'] = '2.0'
71 msg['Content-Type'] = 'text/x-weird'
72 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
73 msg.set_charset(charset)
74 eq(msg['mime-version'], '2.0')
75 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
76 eq(msg['content-transfer-encoding'], 'quinted-puntable')
77
78 def test_set_charset_from_string(self):
79 eq = self.assertEqual
80 msg = Message()
81 msg.set_charset('us-ascii')
82 eq(msg.get_charset().input_charset, 'us-ascii')
83 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
84
85 def test_set_payload_with_charset(self):
86 msg = Message()
87 charset = Charset('iso-8859-1')
88 msg.set_payload('This is a string payload', charset)
89 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
90
91 def test_get_charsets(self):
92 eq = self.assertEqual
93
94 msg = self._msgobj('msg_08.txt')
95 charsets = msg.get_charsets()
96 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
97
98 msg = self._msgobj('msg_09.txt')
99 charsets = msg.get_charsets('dingbat')
100 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
101 'koi8-r'])
102
103 msg = self._msgobj('msg_12.txt')
104 charsets = msg.get_charsets()
105 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
106 'iso-8859-3', 'us-ascii', 'koi8-r'])
107
108 def test_get_filename(self):
109 eq = self.assertEqual
110
111 msg = self._msgobj('msg_04.txt')
112 filenames = [p.get_filename() for p in msg.get_payload()]
113 eq(filenames, ['msg.txt', 'msg.txt'])
114
115 msg = self._msgobj('msg_07.txt')
116 subpart = msg.get_payload(1)
117 eq(subpart.get_filename(), 'dingusfish.gif')
118
119 def test_get_filename_with_name_parameter(self):
120 eq = self.assertEqual
121
122 msg = self._msgobj('msg_44.txt')
123 filenames = [p.get_filename() for p in msg.get_payload()]
124 eq(filenames, ['msg.txt', 'msg.txt'])
125
126 def test_get_boundary(self):
127 eq = self.assertEqual
128 msg = self._msgobj('msg_07.txt')
129 # No quotes!
130 eq(msg.get_boundary(), 'BOUNDARY')
131
132 def test_set_boundary(self):
133 eq = self.assertEqual
134 # This one has no existing boundary parameter, but the Content-Type:
135 # header appears fifth.
136 msg = self._msgobj('msg_01.txt')
137 msg.set_boundary('BOUNDARY')
138 header, value = msg.items()[4]
139 eq(header.lower(), 'content-type')
140 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
141 # This one has a Content-Type: header, with a boundary, stuck in the
142 # middle of its headers. Make sure the order is preserved; it should
143 # be fifth.
144 msg = self._msgobj('msg_04.txt')
145 msg.set_boundary('BOUNDARY')
146 header, value = msg.items()[4]
147 eq(header.lower(), 'content-type')
148 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
149 # And this one has no Content-Type: header at all.
150 msg = self._msgobj('msg_03.txt')
151 self.assertRaises(errors.HeaderParseError,
152 msg.set_boundary, 'BOUNDARY')
153
R. David Murray73a559d2010-12-21 18:07:59 +0000154 def test_make_boundary(self):
155 msg = MIMEMultipart('form-data')
156 # Note that when the boundary gets created is an implementation
157 # detail and might change.
158 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
159 # Trigger creation of boundary
160 msg.as_string()
161 self.assertEqual(msg.items()[0][1][:33],
162 'multipart/form-data; boundary="==')
163 # XXX: there ought to be tests of the uniqueness of the boundary, too.
164
R. David Murray57c45ac2010-02-21 04:39:40 +0000165 def test_message_rfc822_only(self):
166 # Issue 7970: message/rfc822 not in multipart parsed by
167 # HeaderParser caused an exception when flattened.
R David Murray28346b82011-03-31 11:40:20 -0400168 with openfile('msg_46.txt') as fp:
Brett Cannon384917a2010-10-29 23:08:36 +0000169 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000170 parser = HeaderParser()
171 msg = parser.parsestr(msgdata)
172 out = StringIO()
173 gen = Generator(out, True, 0)
174 gen.flatten(msg, False)
175 self.assertEqual(out.getvalue(), msgdata)
176
R David Murrayb35c8502011-04-13 16:46:05 -0400177 def test_byte_message_rfc822_only(self):
178 # Make sure new bytes header parser also passes this.
179 with openfile('msg_46.txt', 'rb') as fp:
180 msgdata = fp.read()
181 parser = email.parser.BytesHeaderParser()
182 msg = parser.parsebytes(msgdata)
183 out = BytesIO()
184 gen = email.generator.BytesGenerator(out)
185 gen.flatten(msg)
186 self.assertEqual(out.getvalue(), msgdata)
187
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000188 def test_get_decoded_payload(self):
189 eq = self.assertEqual
190 msg = self._msgobj('msg_10.txt')
191 # The outer message is a multipart
192 eq(msg.get_payload(decode=True), None)
193 # Subpart 1 is 7bit encoded
194 eq(msg.get_payload(0).get_payload(decode=True),
195 b'This is a 7bit encoded message.\n')
196 # Subpart 2 is quopri
197 eq(msg.get_payload(1).get_payload(decode=True),
198 b'\xa1This is a Quoted Printable encoded message!\n')
199 # Subpart 3 is base64
200 eq(msg.get_payload(2).get_payload(decode=True),
201 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000202 # Subpart 4 is base64 with a trailing newline, which
203 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000204 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000205 b'This is a Base64 encoded message.\n')
206 # Subpart 5 has no Content-Transfer-Encoding: header.
207 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000208 b'This has no Content-Transfer-Encoding: header.\n')
209
210 def test_get_decoded_uu_payload(self):
211 eq = self.assertEqual
212 msg = Message()
213 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
214 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
215 msg['content-transfer-encoding'] = cte
216 eq(msg.get_payload(decode=True), b'hello world')
217 # Now try some bogus data
218 msg.set_payload('foo')
219 eq(msg.get_payload(decode=True), b'foo')
220
R David Murraya2860e82011-04-16 09:20:30 -0400221 def test_get_payload_n_raises_on_non_multipart(self):
222 msg = Message()
223 self.assertRaises(TypeError, msg.get_payload, 1)
224
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000225 def test_decoded_generator(self):
226 eq = self.assertEqual
227 msg = self._msgobj('msg_07.txt')
228 with openfile('msg_17.txt') as fp:
229 text = fp.read()
230 s = StringIO()
231 g = DecodedGenerator(s)
232 g.flatten(msg)
233 eq(s.getvalue(), text)
234
235 def test__contains__(self):
236 msg = Message()
237 msg['From'] = 'Me'
238 msg['to'] = 'You'
239 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000240 self.assertTrue('from' in msg)
241 self.assertTrue('From' in msg)
242 self.assertTrue('FROM' in msg)
243 self.assertTrue('to' in msg)
244 self.assertTrue('To' in msg)
245 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000246
247 def test_as_string(self):
248 eq = self.ndiffAssertEqual
249 msg = self._msgobj('msg_01.txt')
250 with openfile('msg_01.txt') as fp:
251 text = fp.read()
252 eq(text, str(msg))
253 fullrepr = msg.as_string(unixfrom=True)
254 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000255 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000256 eq(text, NL.join(lines[1:]))
257
R David Murray97f43c02012-06-24 05:03:27 -0400258 # test_headerregistry.TestContentTypeHeader.bad_params
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000259 def test_bad_param(self):
260 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
261 self.assertEqual(msg.get_param('baz'), '')
262
263 def test_missing_filename(self):
264 msg = email.message_from_string("From: foo\n")
265 self.assertEqual(msg.get_filename(), None)
266
267 def test_bogus_filename(self):
268 msg = email.message_from_string(
269 "Content-Disposition: blarg; filename\n")
270 self.assertEqual(msg.get_filename(), '')
271
272 def test_missing_boundary(self):
273 msg = email.message_from_string("From: foo\n")
274 self.assertEqual(msg.get_boundary(), None)
275
276 def test_get_params(self):
277 eq = self.assertEqual
278 msg = email.message_from_string(
279 'X-Header: foo=one; bar=two; baz=three\n')
280 eq(msg.get_params(header='x-header'),
281 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
282 msg = email.message_from_string(
283 'X-Header: foo; bar=one; baz=two\n')
284 eq(msg.get_params(header='x-header'),
285 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
286 eq(msg.get_params(), None)
287 msg = email.message_from_string(
288 'X-Header: foo; bar="one"; baz=two\n')
289 eq(msg.get_params(header='x-header'),
290 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
291
R David Murray97f43c02012-06-24 05:03:27 -0400292 # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000293 def test_get_param_liberal(self):
294 msg = Message()
295 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
296 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
297
298 def test_get_param(self):
299 eq = self.assertEqual
300 msg = email.message_from_string(
301 "X-Header: foo=one; bar=two; baz=three\n")
302 eq(msg.get_param('bar', header='x-header'), 'two')
303 eq(msg.get_param('quuz', header='x-header'), None)
304 eq(msg.get_param('quuz'), None)
305 msg = email.message_from_string(
306 'X-Header: foo; bar="one"; baz=two\n')
307 eq(msg.get_param('foo', header='x-header'), '')
308 eq(msg.get_param('bar', header='x-header'), 'one')
309 eq(msg.get_param('baz', header='x-header'), 'two')
310 # XXX: We are not RFC-2045 compliant! We cannot parse:
311 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
312 # msg.get_param("weird")
313 # yet.
314
R David Murray97f43c02012-06-24 05:03:27 -0400315 # test_headerregistry.TestContentTypeHeader.spaces_around_semis
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000316 def test_get_param_funky_continuation_lines(self):
317 msg = self._msgobj('msg_22.txt')
318 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
319
R David Murray97f43c02012-06-24 05:03:27 -0400320 # test_headerregistry.TestContentTypeHeader.semis_inside_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000321 def test_get_param_with_semis_in_quotes(self):
322 msg = email.message_from_string(
323 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
324 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
325 self.assertEqual(msg.get_param('name', unquote=False),
326 '"Jim&amp;&amp;Jill"')
327
R David Murray97f43c02012-06-24 05:03:27 -0400328 # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value
R. David Murrayd48739f2010-04-14 18:59:18 +0000329 def test_get_param_with_quotes(self):
330 msg = email.message_from_string(
331 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
332 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
333 msg = email.message_from_string(
334 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
335 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
336
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000337 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000338 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000339 msg = email.message_from_string('Header: exists')
340 unless('header' in msg)
341 unless('Header' in msg)
342 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000343 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000344
345 def test_set_param(self):
346 eq = self.assertEqual
347 msg = Message()
348 msg.set_param('charset', 'iso-2022-jp')
349 eq(msg.get_param('charset'), 'iso-2022-jp')
350 msg.set_param('importance', 'high value')
351 eq(msg.get_param('importance'), 'high value')
352 eq(msg.get_param('importance', unquote=False), '"high value"')
353 eq(msg.get_params(), [('text/plain', ''),
354 ('charset', 'iso-2022-jp'),
355 ('importance', 'high value')])
356 eq(msg.get_params(unquote=False), [('text/plain', ''),
357 ('charset', '"iso-2022-jp"'),
358 ('importance', '"high value"')])
359 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
360 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
361
362 def test_del_param(self):
363 eq = self.assertEqual
364 msg = self._msgobj('msg_05.txt')
365 eq(msg.get_params(),
366 [('multipart/report', ''), ('report-type', 'delivery-status'),
367 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
368 old_val = msg.get_param("report-type")
369 msg.del_param("report-type")
370 eq(msg.get_params(),
371 [('multipart/report', ''),
372 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
373 msg.set_param("report-type", old_val)
374 eq(msg.get_params(),
375 [('multipart/report', ''),
376 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
377 ('report-type', old_val)])
378
379 def test_del_param_on_other_header(self):
380 msg = Message()
381 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
382 msg.del_param('filename', 'content-disposition')
383 self.assertEqual(msg['content-disposition'], 'attachment')
384
R David Murraya2860e82011-04-16 09:20:30 -0400385 def test_del_param_on_nonexistent_header(self):
386 msg = Message()
387 msg.del_param('filename', 'content-disposition')
388
389 def test_del_nonexistent_param(self):
390 msg = Message()
391 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
392 existing_header = msg['Content-Type']
393 msg.del_param('foobar', header='Content-Type')
394 self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"')
395
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000396 def test_set_type(self):
397 eq = self.assertEqual
398 msg = Message()
399 self.assertRaises(ValueError, msg.set_type, 'text')
400 msg.set_type('text/plain')
401 eq(msg['content-type'], 'text/plain')
402 msg.set_param('charset', 'us-ascii')
403 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
404 msg.set_type('text/html')
405 eq(msg['content-type'], 'text/html; charset="us-ascii"')
406
407 def test_set_type_on_other_header(self):
408 msg = Message()
409 msg['X-Content-Type'] = 'text/plain'
410 msg.set_type('application/octet-stream', 'X-Content-Type')
411 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
412
413 def test_get_content_type_missing(self):
414 msg = Message()
415 self.assertEqual(msg.get_content_type(), 'text/plain')
416
417 def test_get_content_type_missing_with_default_type(self):
418 msg = Message()
419 msg.set_default_type('message/rfc822')
420 self.assertEqual(msg.get_content_type(), 'message/rfc822')
421
422 def test_get_content_type_from_message_implicit(self):
423 msg = self._msgobj('msg_30.txt')
424 self.assertEqual(msg.get_payload(0).get_content_type(),
425 'message/rfc822')
426
427 def test_get_content_type_from_message_explicit(self):
428 msg = self._msgobj('msg_28.txt')
429 self.assertEqual(msg.get_payload(0).get_content_type(),
430 'message/rfc822')
431
432 def test_get_content_type_from_message_text_plain_implicit(self):
433 msg = self._msgobj('msg_03.txt')
434 self.assertEqual(msg.get_content_type(), 'text/plain')
435
436 def test_get_content_type_from_message_text_plain_explicit(self):
437 msg = self._msgobj('msg_01.txt')
438 self.assertEqual(msg.get_content_type(), 'text/plain')
439
440 def test_get_content_maintype_missing(self):
441 msg = Message()
442 self.assertEqual(msg.get_content_maintype(), 'text')
443
444 def test_get_content_maintype_missing_with_default_type(self):
445 msg = Message()
446 msg.set_default_type('message/rfc822')
447 self.assertEqual(msg.get_content_maintype(), 'message')
448
449 def test_get_content_maintype_from_message_implicit(self):
450 msg = self._msgobj('msg_30.txt')
451 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
452
453 def test_get_content_maintype_from_message_explicit(self):
454 msg = self._msgobj('msg_28.txt')
455 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
456
457 def test_get_content_maintype_from_message_text_plain_implicit(self):
458 msg = self._msgobj('msg_03.txt')
459 self.assertEqual(msg.get_content_maintype(), 'text')
460
461 def test_get_content_maintype_from_message_text_plain_explicit(self):
462 msg = self._msgobj('msg_01.txt')
463 self.assertEqual(msg.get_content_maintype(), 'text')
464
465 def test_get_content_subtype_missing(self):
466 msg = Message()
467 self.assertEqual(msg.get_content_subtype(), 'plain')
468
469 def test_get_content_subtype_missing_with_default_type(self):
470 msg = Message()
471 msg.set_default_type('message/rfc822')
472 self.assertEqual(msg.get_content_subtype(), 'rfc822')
473
474 def test_get_content_subtype_from_message_implicit(self):
475 msg = self._msgobj('msg_30.txt')
476 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
477
478 def test_get_content_subtype_from_message_explicit(self):
479 msg = self._msgobj('msg_28.txt')
480 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
481
482 def test_get_content_subtype_from_message_text_plain_implicit(self):
483 msg = self._msgobj('msg_03.txt')
484 self.assertEqual(msg.get_content_subtype(), 'plain')
485
486 def test_get_content_subtype_from_message_text_plain_explicit(self):
487 msg = self._msgobj('msg_01.txt')
488 self.assertEqual(msg.get_content_subtype(), 'plain')
489
490 def test_get_content_maintype_error(self):
491 msg = Message()
492 msg['Content-Type'] = 'no-slash-in-this-string'
493 self.assertEqual(msg.get_content_maintype(), 'text')
494
495 def test_get_content_subtype_error(self):
496 msg = Message()
497 msg['Content-Type'] = 'no-slash-in-this-string'
498 self.assertEqual(msg.get_content_subtype(), 'plain')
499
500 def test_replace_header(self):
501 eq = self.assertEqual
502 msg = Message()
503 msg.add_header('First', 'One')
504 msg.add_header('Second', 'Two')
505 msg.add_header('Third', 'Three')
506 eq(msg.keys(), ['First', 'Second', 'Third'])
507 eq(msg.values(), ['One', 'Two', 'Three'])
508 msg.replace_header('Second', 'Twenty')
509 eq(msg.keys(), ['First', 'Second', 'Third'])
510 eq(msg.values(), ['One', 'Twenty', 'Three'])
511 msg.add_header('First', 'Eleven')
512 msg.replace_header('First', 'One Hundred')
513 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
514 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
515 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
516
R David Murray80e0aee2012-05-27 21:23:34 -0400517 # test_defect_handling:test_invalid_chars_in_base64_payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000518 def test_broken_base64_payload(self):
519 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
520 msg = Message()
521 msg['content-type'] = 'audio/x-midi'
522 msg['content-transfer-encoding'] = 'base64'
523 msg.set_payload(x)
524 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -0400525 (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0'
526 b'\xa1\x00p\xf6\xbf\xe9\x0f'))
527 self.assertIsInstance(msg.defects[0],
528 errors.InvalidBase64CharactersDefect)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000529
R David Murraya2860e82011-04-16 09:20:30 -0400530 def test_broken_unicode_payload(self):
531 # This test improves coverage but is not a compliance test.
532 # The behavior in this situation is currently undefined by the API.
533 x = 'this is a br\xf6ken thing to do'
534 msg = Message()
535 msg['content-type'] = 'text/plain'
536 msg['content-transfer-encoding'] = '8bit'
537 msg.set_payload(x)
538 self.assertEqual(msg.get_payload(decode=True),
539 bytes(x, 'raw-unicode-escape'))
540
541 def test_questionable_bytes_payload(self):
542 # This test improves coverage but is not a compliance test,
543 # since it involves poking inside the black box.
544 x = 'this is a quéstionable thing to do'.encode('utf-8')
545 msg = Message()
546 msg['content-type'] = 'text/plain; charset="utf-8"'
547 msg['content-transfer-encoding'] = '8bit'
548 msg._payload = x
549 self.assertEqual(msg.get_payload(decode=True), x)
550
R. David Murray7ec754b2010-12-13 23:51:19 +0000551 # Issue 1078919
552 def test_ascii_add_header(self):
553 msg = Message()
554 msg.add_header('Content-Disposition', 'attachment',
555 filename='bud.gif')
556 self.assertEqual('attachment; filename="bud.gif"',
557 msg['Content-Disposition'])
558
559 def test_noascii_add_header(self):
560 msg = Message()
561 msg.add_header('Content-Disposition', 'attachment',
562 filename="Fußballer.ppt")
563 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000564 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000565 msg['Content-Disposition'])
566
567 def test_nonascii_add_header_via_triple(self):
568 msg = Message()
569 msg.add_header('Content-Disposition', 'attachment',
570 filename=('iso-8859-1', '', 'Fußballer.ppt'))
571 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000572 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
573 msg['Content-Disposition'])
574
575 def test_ascii_add_header_with_tspecial(self):
576 msg = Message()
577 msg.add_header('Content-Disposition', 'attachment',
578 filename="windows [filename].ppt")
579 self.assertEqual(
580 'attachment; filename="windows [filename].ppt"',
581 msg['Content-Disposition'])
582
583 def test_nonascii_add_header_with_tspecial(self):
584 msg = Message()
585 msg.add_header('Content-Disposition', 'attachment',
586 filename="Fußballer [filename].ppt")
587 self.assertEqual(
588 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000589 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000590
R David Murraya2860e82011-04-16 09:20:30 -0400591 def test_add_header_with_name_only_param(self):
592 msg = Message()
593 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
594 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
595
596 def test_add_header_with_no_value(self):
597 msg = Message()
598 msg.add_header('X-Status', None)
599 self.assertEqual('', msg['X-Status'])
600
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000601 # Issue 5871: reject an attempt to embed a header inside a header value
602 # (header injection attack).
603 def test_embeded_header_via_Header_rejected(self):
604 msg = Message()
605 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
606 self.assertRaises(errors.HeaderParseError, msg.as_string)
607
608 def test_embeded_header_via_string_rejected(self):
609 msg = Message()
610 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
611 self.assertRaises(errors.HeaderParseError, msg.as_string)
612
R David Murray7441a7a2012-03-14 02:59:51 -0400613 def test_unicode_header_defaults_to_utf8_encoding(self):
614 # Issue 14291
615 m = MIMEText('abc\n')
616 m['Subject'] = 'É test'
617 self.assertEqual(str(m),textwrap.dedent("""\
618 Content-Type: text/plain; charset="us-ascii"
619 MIME-Version: 1.0
620 Content-Transfer-Encoding: 7bit
621 Subject: =?utf-8?q?=C3=89_test?=
622
623 abc
624 """))
625
R David Murray8680bcc2012-03-22 22:17:51 -0400626 def test_unicode_body_defaults_to_utf8_encoding(self):
627 # Issue 14291
628 m = MIMEText('É testabc\n')
629 self.assertEqual(str(m),textwrap.dedent("""\
R David Murray8680bcc2012-03-22 22:17:51 -0400630 Content-Type: text/plain; charset="utf-8"
R David Murray42243c42012-03-22 22:40:44 -0400631 MIME-Version: 1.0
R David Murray8680bcc2012-03-22 22:17:51 -0400632 Content-Transfer-Encoding: base64
633
634 w4kgdGVzdGFiYwo=
635 """))
636
637
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000638# Test the email.encoders module
639class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400640
641 def test_EncodersEncode_base64(self):
642 with openfile('PyBanner048.gif', 'rb') as fp:
643 bindata = fp.read()
644 mimed = email.mime.image.MIMEImage(bindata)
645 base64ed = mimed.get_payload()
646 # the transfer-encoded body lines should all be <=76 characters
647 lines = base64ed.split('\n')
648 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
649
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000650 def test_encode_empty_payload(self):
651 eq = self.assertEqual
652 msg = Message()
653 msg.set_charset('us-ascii')
654 eq(msg['content-transfer-encoding'], '7bit')
655
656 def test_default_cte(self):
657 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000658 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000659 msg = MIMEText('hello world')
660 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000661 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000662 msg = MIMEText('hello \xf8 world')
R David Murray8680bcc2012-03-22 22:17:51 -0400663 eq(msg['content-transfer-encoding'], 'base64')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000664 # And now with a different charset
665 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
666 eq(msg['content-transfer-encoding'], 'quoted-printable')
667
R. David Murraye85200d2010-05-06 01:41:14 +0000668 def test_encode7or8bit(self):
669 # Make sure a charset whose input character set is 8bit but
670 # whose output character set is 7bit gets a transfer-encoding
671 # of 7bit.
672 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000673 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000674 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000675
R David Murrayf581b372013-02-05 10:49:49 -0500676 def test_qp_encode_latin1(self):
677 msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1')
678 self.assertEqual(str(msg), textwrap.dedent("""\
679 MIME-Version: 1.0
680 Content-Type: text/text; charset="iso-8859-1"
681 Content-Transfer-Encoding: quoted-printable
682
683 =E1=F6
684 """))
685
686 def test_qp_encode_non_latin1(self):
687 # Issue 16948
688 msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2')
689 self.assertEqual(str(msg), textwrap.dedent("""\
690 MIME-Version: 1.0
691 Content-Type: text/text; charset="iso-8859-2"
692 Content-Transfer-Encoding: quoted-printable
693
694 =BF
695 """))
696
Ezio Melottib3aedd42010-11-20 19:04:17 +0000697
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000698# Test long header wrapping
699class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400700
701 maxDiff = None
702
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000703 def test_split_long_continuation(self):
704 eq = self.ndiffAssertEqual
705 msg = email.message_from_string("""\
706Subject: bug demonstration
707\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
708\tmore text
709
710test
711""")
712 sfp = StringIO()
713 g = Generator(sfp)
714 g.flatten(msg)
715 eq(sfp.getvalue(), """\
716Subject: bug demonstration
717\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
718\tmore text
719
720test
721""")
722
723 def test_another_long_almost_unsplittable_header(self):
724 eq = self.ndiffAssertEqual
725 hstr = """\
726bug demonstration
727\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
728\tmore text"""
729 h = Header(hstr, continuation_ws='\t')
730 eq(h.encode(), """\
731bug demonstration
732\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
733\tmore text""")
734 h = Header(hstr.replace('\t', ' '))
735 eq(h.encode(), """\
736bug demonstration
737 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
738 more text""")
739
740 def test_long_nonstring(self):
741 eq = self.ndiffAssertEqual
742 g = Charset("iso-8859-1")
743 cz = Charset("iso-8859-2")
744 utf8 = Charset("utf-8")
745 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
746 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
747 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
748 b'bef\xf6rdert. ')
749 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
750 b'd\xf9vtipu.. ')
751 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
752 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
753 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
754 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
755 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
756 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
757 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
758 '\u3044\u307e\u3059\u3002')
759 h = Header(g_head, g, header_name='Subject')
760 h.append(cz_head, cz)
761 h.append(utf8_head, utf8)
762 msg = Message()
763 msg['Subject'] = h
764 sfp = StringIO()
765 g = Generator(sfp)
766 g.flatten(msg)
767 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000768Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
769 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
770 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
771 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
772 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
773 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
774 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
775 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
776 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
777 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
778 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000779
780""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000781 eq(h.encode(maxlinelen=76), """\
782=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
783 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
784 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
785 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
786 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
787 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
788 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
789 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
790 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
791 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
792 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000793
794 def test_long_header_encode(self):
795 eq = self.ndiffAssertEqual
796 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
797 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
798 header_name='X-Foobar-Spoink-Defrobnit')
799 eq(h.encode(), '''\
800wasnipoop; giraffes="very-long-necked-animals";
801 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
802
803 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
804 eq = self.ndiffAssertEqual
805 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
806 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
807 header_name='X-Foobar-Spoink-Defrobnit',
808 continuation_ws='\t')
809 eq(h.encode(), '''\
810wasnipoop; giraffes="very-long-necked-animals";
811 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
812
813 def test_long_header_encode_with_tab_continuation(self):
814 eq = self.ndiffAssertEqual
815 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
816 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
817 header_name='X-Foobar-Spoink-Defrobnit',
818 continuation_ws='\t')
819 eq(h.encode(), '''\
820wasnipoop; giraffes="very-long-necked-animals";
821\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
822
R David Murray3a6152f2011-03-14 21:13:03 -0400823 def test_header_encode_with_different_output_charset(self):
824 h = Header('文', 'euc-jp')
825 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
826
827 def test_long_header_encode_with_different_output_charset(self):
828 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
829 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
830 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
831 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
832 res = """\
833=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
834 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
835 self.assertEqual(h.encode(), res)
836
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000837 def test_header_splitter(self):
838 eq = self.ndiffAssertEqual
839 msg = MIMEText('')
840 # It'd be great if we could use add_header() here, but that doesn't
841 # guarantee an order of the parameters.
842 msg['X-Foobar-Spoink-Defrobnit'] = (
843 'wasnipoop; giraffes="very-long-necked-animals"; '
844 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
845 sfp = StringIO()
846 g = Generator(sfp)
847 g.flatten(msg)
848 eq(sfp.getvalue(), '''\
849Content-Type: text/plain; charset="us-ascii"
850MIME-Version: 1.0
851Content-Transfer-Encoding: 7bit
852X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
853 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
854
855''')
856
857 def test_no_semis_header_splitter(self):
858 eq = self.ndiffAssertEqual
859 msg = Message()
860 msg['From'] = 'test@dom.ain'
861 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
862 msg.set_payload('Test')
863 sfp = StringIO()
864 g = Generator(sfp)
865 g.flatten(msg)
866 eq(sfp.getvalue(), """\
867From: test@dom.ain
868References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
869 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
870
871Test""")
872
R David Murray7da4db12011-04-07 20:37:17 -0400873 def test_last_split_chunk_does_not_fit(self):
874 eq = self.ndiffAssertEqual
875 h = Header('Subject: the first part of this is short, but_the_second'
876 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
877 '_all_by_itself')
878 eq(h.encode(), """\
879Subject: the first part of this is short,
880 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
881
882 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
883 eq = self.ndiffAssertEqual
884 h = Header(', but_the_second'
885 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
886 '_all_by_itself')
887 eq(h.encode(), """\
888,
889 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
890
891 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
892 eq = self.ndiffAssertEqual
893 h = Header(', , but_the_second'
894 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
895 '_all_by_itself')
896 eq(h.encode(), """\
897, ,
898 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
899
900 def test_trailing_splitable_on_overlong_unsplitable(self):
901 eq = self.ndiffAssertEqual
902 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
903 'be_on_a_line_all_by_itself;')
904 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
905 "be_on_a_line_all_by_itself;")
906
907 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
908 eq = self.ndiffAssertEqual
909 h = Header('; '
910 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400911 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400912 eq(h.encode(), """\
913;
R David Murray01581ee2011-04-18 10:04:34 -0400914 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400915
R David Murraye1292a22011-04-07 20:54:03 -0400916 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400917 eq = self.ndiffAssertEqual
918 h = Header('This is a long line that has two whitespaces in a row. '
919 'This used to cause truncation of the header when folded')
920 eq(h.encode(), """\
921This is a long line that has two whitespaces in a row. This used to cause
922 truncation of the header when folded""")
923
R David Murray01581ee2011-04-18 10:04:34 -0400924 def test_splitter_split_on_punctuation_only_if_fws(self):
925 eq = self.ndiffAssertEqual
926 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
927 'they;arenotlegal;fold,points')
928 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
929 "arenotlegal;fold,points")
930
931 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
932 eq = self.ndiffAssertEqual
933 h = Header('this is a test where we need to have more than one line '
934 'before; our final line that is just too big to fit;; '
935 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
936 'be_on_a_line_all_by_itself;')
937 eq(h.encode(), """\
938this is a test where we need to have more than one line before;
939 our final line that is just too big to fit;;
940 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
941
942 def test_overlong_last_part_followed_by_split_point(self):
943 eq = self.ndiffAssertEqual
944 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
945 'be_on_a_line_all_by_itself ')
946 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
947 "should_be_on_a_line_all_by_itself ")
948
949 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
950 eq = self.ndiffAssertEqual
951 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
952 'before_our_final_line_; ; '
953 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
954 'be_on_a_line_all_by_itself; ')
955 eq(h.encode(), """\
956this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
957 ;
958 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
959
960 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
961 eq = self.ndiffAssertEqual
962 h = Header('this is a test where we need to have more than one line '
963 'before our final line; ; '
964 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
965 'be_on_a_line_all_by_itself; ')
966 eq(h.encode(), """\
967this is a test where we need to have more than one line before our final line;
968 ;
969 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
970
971 def test_long_header_with_whitespace_runs(self):
972 eq = self.ndiffAssertEqual
973 msg = Message()
974 msg['From'] = 'test@dom.ain'
975 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
976 msg.set_payload('Test')
977 sfp = StringIO()
978 g = Generator(sfp)
979 g.flatten(msg)
980 eq(sfp.getvalue(), """\
981From: test@dom.ain
982References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
983 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
984 <foo@dom.ain> <foo@dom.ain>\x20\x20
985
986Test""")
987
988 def test_long_run_with_semi_header_splitter(self):
989 eq = self.ndiffAssertEqual
990 msg = Message()
991 msg['From'] = 'test@dom.ain'
992 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
993 msg.set_payload('Test')
994 sfp = StringIO()
995 g = Generator(sfp)
996 g.flatten(msg)
997 eq(sfp.getvalue(), """\
998From: test@dom.ain
999References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1000 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1001 <foo@dom.ain>; abc
1002
1003Test""")
1004
1005 def test_splitter_split_on_punctuation_only_if_fws(self):
1006 eq = self.ndiffAssertEqual
1007 msg = Message()
1008 msg['From'] = 'test@dom.ain'
1009 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
1010 'they;arenotlegal;fold,points')
1011 msg.set_payload('Test')
1012 sfp = StringIO()
1013 g = Generator(sfp)
1014 g.flatten(msg)
1015 # XXX the space after the header should not be there.
1016 eq(sfp.getvalue(), """\
1017From: test@dom.ain
1018References:\x20
1019 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
1020
1021Test""")
1022
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001023 def test_no_split_long_header(self):
1024 eq = self.ndiffAssertEqual
1025 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +00001026 h = Header(hstr)
1027 # These come on two lines because Headers are really field value
1028 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001029 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001030References:
1031 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1032 h = Header('x' * 80)
1033 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001034
1035 def test_splitting_multiple_long_lines(self):
1036 eq = self.ndiffAssertEqual
1037 hstr = """\
1038from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1039\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1040\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1041"""
1042 h = Header(hstr, continuation_ws='\t')
1043 eq(h.encode(), """\
1044from babylon.socal-raves.org (localhost [127.0.0.1]);
1045 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1046 for <mailman-admin@babylon.socal-raves.org>;
1047 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1048\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1049 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1050 for <mailman-admin@babylon.socal-raves.org>;
1051 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1052\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1053 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1054 for <mailman-admin@babylon.socal-raves.org>;
1055 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1056
1057 def test_splitting_first_line_only_is_long(self):
1058 eq = self.ndiffAssertEqual
1059 hstr = """\
1060from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1061\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1062\tid 17k4h5-00034i-00
1063\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1064 h = Header(hstr, maxlinelen=78, header_name='Received',
1065 continuation_ws='\t')
1066 eq(h.encode(), """\
1067from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1068 helo=cthulhu.gerg.ca)
1069\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1070\tid 17k4h5-00034i-00
1071\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1072
1073 def test_long_8bit_header(self):
1074 eq = self.ndiffAssertEqual
1075 msg = Message()
1076 h = Header('Britische Regierung gibt', 'iso-8859-1',
1077 header_name='Subject')
1078 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001079 eq(h.encode(maxlinelen=76), """\
1080=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1081 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001082 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001083 eq(msg.as_string(maxheaderlen=76), """\
1084Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1085 =?iso-8859-1?q?hore-Windkraftprojekte?=
1086
1087""")
1088 eq(msg.as_string(maxheaderlen=0), """\
1089Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001090
1091""")
1092
1093 def test_long_8bit_header_no_charset(self):
1094 eq = self.ndiffAssertEqual
1095 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001096 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1097 'f\xfcr Offshore-Windkraftprojekte '
1098 '<a-very-long-address@example.com>')
1099 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001100 eq(msg.as_string(maxheaderlen=78), """\
1101Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1102 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1103
1104""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001105 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001106 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001107 header_name='Reply-To')
1108 eq(msg.as_string(maxheaderlen=78), """\
1109Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1110 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001111
1112""")
1113
1114 def test_long_to_header(self):
1115 eq = self.ndiffAssertEqual
1116 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001117 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001118 '"Someone Test #B" <someone@umich.edu>, '
1119 '"Someone Test #C" <someone@eecs.umich.edu>, '
1120 '"Someone Test #D" <someone@eecs.umich.edu>')
1121 msg = Message()
1122 msg['To'] = to
1123 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001124To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001125 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001126 "Someone Test #C" <someone@eecs.umich.edu>,
1127 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001128
1129''')
1130
1131 def test_long_line_after_append(self):
1132 eq = self.ndiffAssertEqual
1133 s = 'This is an example of string which has almost the limit of header length.'
1134 h = Header(s)
1135 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001136 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001137This is an example of string which has almost the limit of header length.
1138 Add another line.""")
1139
1140 def test_shorter_line_with_append(self):
1141 eq = self.ndiffAssertEqual
1142 s = 'This is a shorter line.'
1143 h = Header(s)
1144 h.append('Add another sentence. (Surprise?)')
1145 eq(h.encode(),
1146 'This is a shorter line. Add another sentence. (Surprise?)')
1147
1148 def test_long_field_name(self):
1149 eq = self.ndiffAssertEqual
1150 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001151 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1152 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1153 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1154 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001155 h = Header(gs, 'iso-8859-1', header_name=fn)
1156 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001157 eq(h.encode(maxlinelen=76), """\
1158=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1159 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1160 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1161 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001162
1163 def test_long_received_header(self):
1164 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1165 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1166 'Wed, 05 Mar 2003 18:10:18 -0700')
1167 msg = Message()
1168 msg['Received-1'] = Header(h, continuation_ws='\t')
1169 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001170 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001171 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001172Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1173 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001174 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001175Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1176 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001177 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001178
1179""")
1180
1181 def test_string_headerinst_eq(self):
1182 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1183 'tu-muenchen.de> (David Bremner\'s message of '
1184 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1185 msg = Message()
1186 msg['Received-1'] = Header(h, header_name='Received-1',
1187 continuation_ws='\t')
1188 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001189 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001190 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001191Received-1:\x20
1192 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1193 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1194Received-2:\x20
1195 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1196 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001197
1198""")
1199
1200 def test_long_unbreakable_lines_with_continuation(self):
1201 eq = self.ndiffAssertEqual
1202 msg = Message()
1203 t = """\
1204iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1205 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1206 msg['Face-1'] = t
1207 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001208 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001209 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001210 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001211 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001212Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001213 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001214 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001215Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001216 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001217 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001218Face-3:\x20
1219 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1220 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001221
1222""")
1223
1224 def test_another_long_multiline_header(self):
1225 eq = self.ndiffAssertEqual
1226 m = ('Received: from siimage.com '
1227 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001228 'Microsoft SMTPSVC(5.0.2195.4905); '
1229 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001230 msg = email.message_from_string(m)
1231 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001232Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1233 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001234
1235''')
1236
1237 def test_long_lines_with_different_header(self):
1238 eq = self.ndiffAssertEqual
1239 h = ('List-Unsubscribe: '
1240 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1241 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1242 '?subject=unsubscribe>')
1243 msg = Message()
1244 msg['List'] = h
1245 msg['List'] = Header(h, header_name='List')
1246 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001247List: List-Unsubscribe:
1248 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001249 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001250List: List-Unsubscribe:
1251 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001252 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001253
1254""")
1255
R. David Murray6f0022d2011-01-07 21:57:25 +00001256 def test_long_rfc2047_header_with_embedded_fws(self):
1257 h = Header(textwrap.dedent("""\
1258 We're going to pretend this header is in a non-ascii character set
1259 \tto see if line wrapping with encoded words and embedded
1260 folding white space works"""),
1261 charset='utf-8',
1262 header_name='Test')
1263 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1264 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1265 =?utf-8?q?cter_set?=
1266 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1267 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1268
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001269
Ezio Melottib3aedd42010-11-20 19:04:17 +00001270
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001271# Test mangling of "From " lines in the body of a message
1272class TestFromMangling(unittest.TestCase):
1273 def setUp(self):
1274 self.msg = Message()
1275 self.msg['From'] = 'aaa@bbb.org'
1276 self.msg.set_payload("""\
1277From the desk of A.A.A.:
1278Blah blah blah
1279""")
1280
1281 def test_mangled_from(self):
1282 s = StringIO()
1283 g = Generator(s, mangle_from_=True)
1284 g.flatten(self.msg)
1285 self.assertEqual(s.getvalue(), """\
1286From: aaa@bbb.org
1287
1288>From the desk of A.A.A.:
1289Blah blah blah
1290""")
1291
1292 def test_dont_mangle_from(self):
1293 s = StringIO()
1294 g = Generator(s, mangle_from_=False)
1295 g.flatten(self.msg)
1296 self.assertEqual(s.getvalue(), """\
1297From: aaa@bbb.org
1298
1299From the desk of A.A.A.:
1300Blah blah blah
1301""")
1302
R David Murray6a31bc62012-07-22 21:47:53 -04001303 def test_mangle_from_in_preamble_and_epilog(self):
1304 s = StringIO()
1305 g = Generator(s, mangle_from_=True)
1306 msg = email.message_from_string(textwrap.dedent("""\
1307 From: foo@bar.com
1308 Mime-Version: 1.0
1309 Content-Type: multipart/mixed; boundary=XXX
1310
1311 From somewhere unknown
1312
1313 --XXX
1314 Content-Type: text/plain
1315
1316 foo
1317
1318 --XXX--
1319
1320 From somewhere unknowable
1321 """))
1322 g.flatten(msg)
1323 self.assertEqual(len([1 for x in s.getvalue().split('\n')
1324 if x.startswith('>From ')]), 2)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001325
R David Murray638d40b2012-08-24 11:14:13 -04001326 def test_mangled_from_with_bad_bytes(self):
1327 source = textwrap.dedent("""\
1328 Content-Type: text/plain; charset="utf-8"
1329 MIME-Version: 1.0
1330 Content-Transfer-Encoding: 8bit
1331 From: aaa@bbb.org
1332
1333 """).encode('utf-8')
1334 msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')
1335 b = BytesIO()
1336 g = BytesGenerator(b, mangle_from_=True)
1337 g.flatten(msg)
1338 self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')
1339
Ezio Melottib3aedd42010-11-20 19:04:17 +00001340
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001341# Test the basic MIMEAudio class
1342class TestMIMEAudio(unittest.TestCase):
1343 def setUp(self):
R David Murray28346b82011-03-31 11:40:20 -04001344 with openfile('audiotest.au', 'rb') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001345 self._audiodata = fp.read()
1346 self._au = MIMEAudio(self._audiodata)
1347
1348 def test_guess_minor_type(self):
1349 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1350
1351 def test_encoding(self):
1352 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001353 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1354 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001355
1356 def test_checkSetMinor(self):
1357 au = MIMEAudio(self._audiodata, 'fish')
1358 self.assertEqual(au.get_content_type(), 'audio/fish')
1359
1360 def test_add_header(self):
1361 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001362 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001363 self._au.add_header('Content-Disposition', 'attachment',
1364 filename='audiotest.au')
1365 eq(self._au['content-disposition'],
1366 'attachment; filename="audiotest.au"')
1367 eq(self._au.get_params(header='content-disposition'),
1368 [('attachment', ''), ('filename', 'audiotest.au')])
1369 eq(self._au.get_param('filename', header='content-disposition'),
1370 'audiotest.au')
1371 missing = []
1372 eq(self._au.get_param('attachment', header='content-disposition'), '')
1373 unless(self._au.get_param('foo', failobj=missing,
1374 header='content-disposition') is missing)
1375 # Try some missing stuff
1376 unless(self._au.get_param('foobar', missing) is missing)
1377 unless(self._au.get_param('attachment', missing,
1378 header='foobar') is missing)
1379
1380
Ezio Melottib3aedd42010-11-20 19:04:17 +00001381
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001382# Test the basic MIMEImage class
1383class TestMIMEImage(unittest.TestCase):
1384 def setUp(self):
1385 with openfile('PyBanner048.gif', 'rb') as fp:
1386 self._imgdata = fp.read()
1387 self._im = MIMEImage(self._imgdata)
1388
1389 def test_guess_minor_type(self):
1390 self.assertEqual(self._im.get_content_type(), 'image/gif')
1391
1392 def test_encoding(self):
1393 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001394 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1395 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001396
1397 def test_checkSetMinor(self):
1398 im = MIMEImage(self._imgdata, 'fish')
1399 self.assertEqual(im.get_content_type(), 'image/fish')
1400
1401 def test_add_header(self):
1402 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001403 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001404 self._im.add_header('Content-Disposition', 'attachment',
1405 filename='dingusfish.gif')
1406 eq(self._im['content-disposition'],
1407 'attachment; filename="dingusfish.gif"')
1408 eq(self._im.get_params(header='content-disposition'),
1409 [('attachment', ''), ('filename', 'dingusfish.gif')])
1410 eq(self._im.get_param('filename', header='content-disposition'),
1411 'dingusfish.gif')
1412 missing = []
1413 eq(self._im.get_param('attachment', header='content-disposition'), '')
1414 unless(self._im.get_param('foo', failobj=missing,
1415 header='content-disposition') is missing)
1416 # Try some missing stuff
1417 unless(self._im.get_param('foobar', missing) is missing)
1418 unless(self._im.get_param('attachment', missing,
1419 header='foobar') is missing)
1420
1421
Ezio Melottib3aedd42010-11-20 19:04:17 +00001422
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001423# Test the basic MIMEApplication class
1424class TestMIMEApplication(unittest.TestCase):
1425 def test_headers(self):
1426 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001427 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001428 eq(msg.get_content_type(), 'application/octet-stream')
1429 eq(msg['content-transfer-encoding'], 'base64')
1430
1431 def test_body(self):
1432 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001433 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1434 msg = MIMEApplication(bytesdata)
1435 # whitespace in the cte encoded block is RFC-irrelevant.
1436 eq(msg.get_payload().strip(), '+vv8/f7/')
1437 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001438
R David Murrayec317a82013-02-11 10:51:28 -05001439 def test_binary_body_with_encode_7or8bit(self):
1440 # Issue 17171.
1441 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1442 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit)
1443 # Treated as a string, this will be invalid code points.
1444 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1445 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1446 self.assertEqual(msg['Content-Transfer-Encoding'], '8bit')
1447 s = BytesIO()
1448 g = BytesGenerator(s)
1449 g.flatten(msg)
1450 wireform = s.getvalue()
1451 msg2 = email.message_from_bytes(wireform)
1452 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1453 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1454 self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit')
1455
1456 def test_binary_body_with_encode_noop(self):
R David Murrayceaa8b12013-02-09 13:02:58 -05001457 # Issue 16564: This does not produce an RFC valid message, since to be
1458 # valid it should have a CTE of binary. But the below works in
1459 # Python2, and is documented as working this way.
1460 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1461 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1462 # Treated as a string, this will be invalid code points.
1463 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1464 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1465 s = BytesIO()
1466 g = BytesGenerator(s)
1467 g.flatten(msg)
1468 wireform = s.getvalue()
1469 msg2 = email.message_from_bytes(wireform)
1470 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1471 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001472
Ezio Melottib3aedd42010-11-20 19:04:17 +00001473
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001474# Test the basic MIMEText class
1475class TestMIMEText(unittest.TestCase):
1476 def setUp(self):
1477 self._msg = MIMEText('hello there')
1478
1479 def test_types(self):
1480 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001481 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001482 eq(self._msg.get_content_type(), 'text/plain')
1483 eq(self._msg.get_param('charset'), 'us-ascii')
1484 missing = []
1485 unless(self._msg.get_param('foobar', missing) is missing)
1486 unless(self._msg.get_param('charset', missing, header='foobar')
1487 is missing)
1488
1489 def test_payload(self):
1490 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001491 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001492
1493 def test_charset(self):
1494 eq = self.assertEqual
1495 msg = MIMEText('hello there', _charset='us-ascii')
1496 eq(msg.get_charset().input_charset, 'us-ascii')
1497 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1498
R. David Murray850fc852010-06-03 01:58:28 +00001499 def test_7bit_input(self):
1500 eq = self.assertEqual
1501 msg = MIMEText('hello there', _charset='us-ascii')
1502 eq(msg.get_charset().input_charset, 'us-ascii')
1503 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1504
1505 def test_7bit_input_no_charset(self):
1506 eq = self.assertEqual
1507 msg = MIMEText('hello there')
1508 eq(msg.get_charset(), 'us-ascii')
1509 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1510 self.assertTrue('hello there' in msg.as_string())
1511
1512 def test_utf8_input(self):
1513 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1514 eq = self.assertEqual
1515 msg = MIMEText(teststr, _charset='utf-8')
1516 eq(msg.get_charset().output_charset, 'utf-8')
1517 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1518 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1519
1520 @unittest.skip("can't fix because of backward compat in email5, "
1521 "will fix in email6")
1522 def test_utf8_input_no_charset(self):
1523 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1524 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1525
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001526
Ezio Melottib3aedd42010-11-20 19:04:17 +00001527
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001528# Test complicated multipart/* messages
1529class TestMultipart(TestEmailBase):
1530 def setUp(self):
1531 with openfile('PyBanner048.gif', 'rb') as fp:
1532 data = fp.read()
1533 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1534 image = MIMEImage(data, name='dingusfish.gif')
1535 image.add_header('content-disposition', 'attachment',
1536 filename='dingusfish.gif')
1537 intro = MIMEText('''\
1538Hi there,
1539
1540This is the dingus fish.
1541''')
1542 container.attach(intro)
1543 container.attach(image)
1544 container['From'] = 'Barry <barry@digicool.com>'
1545 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1546 container['Subject'] = 'Here is your dingus fish'
1547
1548 now = 987809702.54848599
1549 timetuple = time.localtime(now)
1550 if timetuple[-1] == 0:
1551 tzsecs = time.timezone
1552 else:
1553 tzsecs = time.altzone
1554 if tzsecs > 0:
1555 sign = '-'
1556 else:
1557 sign = '+'
1558 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1559 container['Date'] = time.strftime(
1560 '%a, %d %b %Y %H:%M:%S',
1561 time.localtime(now)) + tzoffset
1562 self._msg = container
1563 self._im = image
1564 self._txt = intro
1565
1566 def test_hierarchy(self):
1567 # convenience
1568 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001569 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001570 raises = self.assertRaises
1571 # tests
1572 m = self._msg
1573 unless(m.is_multipart())
1574 eq(m.get_content_type(), 'multipart/mixed')
1575 eq(len(m.get_payload()), 2)
1576 raises(IndexError, m.get_payload, 2)
1577 m0 = m.get_payload(0)
1578 m1 = m.get_payload(1)
1579 unless(m0 is self._txt)
1580 unless(m1 is self._im)
1581 eq(m.get_payload(), [m0, m1])
1582 unless(not m0.is_multipart())
1583 unless(not m1.is_multipart())
1584
1585 def test_empty_multipart_idempotent(self):
1586 text = """\
1587Content-Type: multipart/mixed; boundary="BOUNDARY"
1588MIME-Version: 1.0
1589Subject: A subject
1590To: aperson@dom.ain
1591From: bperson@dom.ain
1592
1593
1594--BOUNDARY
1595
1596
1597--BOUNDARY--
1598"""
1599 msg = Parser().parsestr(text)
1600 self.ndiffAssertEqual(text, msg.as_string())
1601
1602 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1603 outer = MIMEBase('multipart', 'mixed')
1604 outer['Subject'] = 'A subject'
1605 outer['To'] = 'aperson@dom.ain'
1606 outer['From'] = 'bperson@dom.ain'
1607 outer.set_boundary('BOUNDARY')
1608 self.ndiffAssertEqual(outer.as_string(), '''\
1609Content-Type: multipart/mixed; boundary="BOUNDARY"
1610MIME-Version: 1.0
1611Subject: A subject
1612To: aperson@dom.ain
1613From: bperson@dom.ain
1614
1615--BOUNDARY
1616
1617--BOUNDARY--''')
1618
1619 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1620 outer = MIMEBase('multipart', 'mixed')
1621 outer['Subject'] = 'A subject'
1622 outer['To'] = 'aperson@dom.ain'
1623 outer['From'] = 'bperson@dom.ain'
1624 outer.preamble = ''
1625 outer.epilogue = ''
1626 outer.set_boundary('BOUNDARY')
1627 self.ndiffAssertEqual(outer.as_string(), '''\
1628Content-Type: multipart/mixed; boundary="BOUNDARY"
1629MIME-Version: 1.0
1630Subject: A subject
1631To: aperson@dom.ain
1632From: bperson@dom.ain
1633
1634
1635--BOUNDARY
1636
1637--BOUNDARY--
1638''')
1639
1640 def test_one_part_in_a_multipart(self):
1641 eq = self.ndiffAssertEqual
1642 outer = MIMEBase('multipart', 'mixed')
1643 outer['Subject'] = 'A subject'
1644 outer['To'] = 'aperson@dom.ain'
1645 outer['From'] = 'bperson@dom.ain'
1646 outer.set_boundary('BOUNDARY')
1647 msg = MIMEText('hello world')
1648 outer.attach(msg)
1649 eq(outer.as_string(), '''\
1650Content-Type: multipart/mixed; boundary="BOUNDARY"
1651MIME-Version: 1.0
1652Subject: A subject
1653To: aperson@dom.ain
1654From: bperson@dom.ain
1655
1656--BOUNDARY
1657Content-Type: text/plain; charset="us-ascii"
1658MIME-Version: 1.0
1659Content-Transfer-Encoding: 7bit
1660
1661hello world
1662--BOUNDARY--''')
1663
1664 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1665 eq = self.ndiffAssertEqual
1666 outer = MIMEBase('multipart', 'mixed')
1667 outer['Subject'] = 'A subject'
1668 outer['To'] = 'aperson@dom.ain'
1669 outer['From'] = 'bperson@dom.ain'
1670 outer.preamble = ''
1671 msg = MIMEText('hello world')
1672 outer.attach(msg)
1673 outer.set_boundary('BOUNDARY')
1674 eq(outer.as_string(), '''\
1675Content-Type: multipart/mixed; boundary="BOUNDARY"
1676MIME-Version: 1.0
1677Subject: A subject
1678To: aperson@dom.ain
1679From: bperson@dom.ain
1680
1681
1682--BOUNDARY
1683Content-Type: text/plain; charset="us-ascii"
1684MIME-Version: 1.0
1685Content-Transfer-Encoding: 7bit
1686
1687hello world
1688--BOUNDARY--''')
1689
1690
1691 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1692 eq = self.ndiffAssertEqual
1693 outer = MIMEBase('multipart', 'mixed')
1694 outer['Subject'] = 'A subject'
1695 outer['To'] = 'aperson@dom.ain'
1696 outer['From'] = 'bperson@dom.ain'
1697 outer.preamble = None
1698 msg = MIMEText('hello world')
1699 outer.attach(msg)
1700 outer.set_boundary('BOUNDARY')
1701 eq(outer.as_string(), '''\
1702Content-Type: multipart/mixed; boundary="BOUNDARY"
1703MIME-Version: 1.0
1704Subject: A subject
1705To: aperson@dom.ain
1706From: bperson@dom.ain
1707
1708--BOUNDARY
1709Content-Type: text/plain; charset="us-ascii"
1710MIME-Version: 1.0
1711Content-Transfer-Encoding: 7bit
1712
1713hello world
1714--BOUNDARY--''')
1715
1716
1717 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1718 eq = self.ndiffAssertEqual
1719 outer = MIMEBase('multipart', 'mixed')
1720 outer['Subject'] = 'A subject'
1721 outer['To'] = 'aperson@dom.ain'
1722 outer['From'] = 'bperson@dom.ain'
1723 outer.epilogue = None
1724 msg = MIMEText('hello world')
1725 outer.attach(msg)
1726 outer.set_boundary('BOUNDARY')
1727 eq(outer.as_string(), '''\
1728Content-Type: multipart/mixed; boundary="BOUNDARY"
1729MIME-Version: 1.0
1730Subject: A subject
1731To: aperson@dom.ain
1732From: bperson@dom.ain
1733
1734--BOUNDARY
1735Content-Type: text/plain; charset="us-ascii"
1736MIME-Version: 1.0
1737Content-Transfer-Encoding: 7bit
1738
1739hello world
1740--BOUNDARY--''')
1741
1742
1743 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1744 eq = self.ndiffAssertEqual
1745 outer = MIMEBase('multipart', 'mixed')
1746 outer['Subject'] = 'A subject'
1747 outer['To'] = 'aperson@dom.ain'
1748 outer['From'] = 'bperson@dom.ain'
1749 outer.epilogue = ''
1750 msg = MIMEText('hello world')
1751 outer.attach(msg)
1752 outer.set_boundary('BOUNDARY')
1753 eq(outer.as_string(), '''\
1754Content-Type: multipart/mixed; boundary="BOUNDARY"
1755MIME-Version: 1.0
1756Subject: A subject
1757To: aperson@dom.ain
1758From: bperson@dom.ain
1759
1760--BOUNDARY
1761Content-Type: text/plain; charset="us-ascii"
1762MIME-Version: 1.0
1763Content-Transfer-Encoding: 7bit
1764
1765hello world
1766--BOUNDARY--
1767''')
1768
1769
1770 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1771 eq = self.ndiffAssertEqual
1772 outer = MIMEBase('multipart', 'mixed')
1773 outer['Subject'] = 'A subject'
1774 outer['To'] = 'aperson@dom.ain'
1775 outer['From'] = 'bperson@dom.ain'
1776 outer.epilogue = '\n'
1777 msg = MIMEText('hello world')
1778 outer.attach(msg)
1779 outer.set_boundary('BOUNDARY')
1780 eq(outer.as_string(), '''\
1781Content-Type: multipart/mixed; boundary="BOUNDARY"
1782MIME-Version: 1.0
1783Subject: A subject
1784To: aperson@dom.ain
1785From: bperson@dom.ain
1786
1787--BOUNDARY
1788Content-Type: text/plain; charset="us-ascii"
1789MIME-Version: 1.0
1790Content-Transfer-Encoding: 7bit
1791
1792hello world
1793--BOUNDARY--
1794
1795''')
1796
1797 def test_message_external_body(self):
1798 eq = self.assertEqual
1799 msg = self._msgobj('msg_36.txt')
1800 eq(len(msg.get_payload()), 2)
1801 msg1 = msg.get_payload(1)
1802 eq(msg1.get_content_type(), 'multipart/alternative')
1803 eq(len(msg1.get_payload()), 2)
1804 for subpart in msg1.get_payload():
1805 eq(subpart.get_content_type(), 'message/external-body')
1806 eq(len(subpart.get_payload()), 1)
1807 subsubpart = subpart.get_payload(0)
1808 eq(subsubpart.get_content_type(), 'text/plain')
1809
1810 def test_double_boundary(self):
1811 # msg_37.txt is a multipart that contains two dash-boundary's in a
1812 # row. Our interpretation of RFC 2046 calls for ignoring the second
1813 # and subsequent boundaries.
1814 msg = self._msgobj('msg_37.txt')
1815 self.assertEqual(len(msg.get_payload()), 3)
1816
1817 def test_nested_inner_contains_outer_boundary(self):
1818 eq = self.ndiffAssertEqual
1819 # msg_38.txt has an inner part that contains outer boundaries. My
1820 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1821 # these are illegal and should be interpreted as unterminated inner
1822 # parts.
1823 msg = self._msgobj('msg_38.txt')
1824 sfp = StringIO()
1825 iterators._structure(msg, sfp)
1826 eq(sfp.getvalue(), """\
1827multipart/mixed
1828 multipart/mixed
1829 multipart/alternative
1830 text/plain
1831 text/plain
1832 text/plain
1833 text/plain
1834""")
1835
1836 def test_nested_with_same_boundary(self):
1837 eq = self.ndiffAssertEqual
1838 # msg 39.txt is similarly evil in that it's got inner parts that use
1839 # the same boundary as outer parts. Again, I believe the way this is
1840 # parsed is closest to the spirit of RFC 2046
1841 msg = self._msgobj('msg_39.txt')
1842 sfp = StringIO()
1843 iterators._structure(msg, sfp)
1844 eq(sfp.getvalue(), """\
1845multipart/mixed
1846 multipart/mixed
1847 multipart/alternative
1848 application/octet-stream
1849 application/octet-stream
1850 text/plain
1851""")
1852
1853 def test_boundary_in_non_multipart(self):
1854 msg = self._msgobj('msg_40.txt')
1855 self.assertEqual(msg.as_string(), '''\
1856MIME-Version: 1.0
1857Content-Type: text/html; boundary="--961284236552522269"
1858
1859----961284236552522269
1860Content-Type: text/html;
1861Content-Transfer-Encoding: 7Bit
1862
1863<html></html>
1864
1865----961284236552522269--
1866''')
1867
1868 def test_boundary_with_leading_space(self):
1869 eq = self.assertEqual
1870 msg = email.message_from_string('''\
1871MIME-Version: 1.0
1872Content-Type: multipart/mixed; boundary=" XXXX"
1873
1874-- XXXX
1875Content-Type: text/plain
1876
1877
1878-- XXXX
1879Content-Type: text/plain
1880
1881-- XXXX--
1882''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001883 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001884 eq(msg.get_boundary(), ' XXXX')
1885 eq(len(msg.get_payload()), 2)
1886
1887 def test_boundary_without_trailing_newline(self):
1888 m = Parser().parsestr("""\
1889Content-Type: multipart/mixed; boundary="===============0012394164=="
1890MIME-Version: 1.0
1891
1892--===============0012394164==
1893Content-Type: image/file1.jpg
1894MIME-Version: 1.0
1895Content-Transfer-Encoding: base64
1896
1897YXNkZg==
1898--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001899 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001900
1901
Ezio Melottib3aedd42010-11-20 19:04:17 +00001902
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001903# Test some badly formatted messages
R David Murrayc27e5222012-05-25 15:01:48 -04001904class TestNonConformant(TestEmailBase):
R David Murray3edd22a2011-04-18 13:59:37 -04001905
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001906 def test_parse_missing_minor_type(self):
1907 eq = self.assertEqual
1908 msg = self._msgobj('msg_14.txt')
1909 eq(msg.get_content_type(), 'text/plain')
1910 eq(msg.get_content_maintype(), 'text')
1911 eq(msg.get_content_subtype(), 'plain')
1912
R David Murray80e0aee2012-05-27 21:23:34 -04001913 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001914 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001915 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001916 msg = self._msgobj('msg_15.txt')
1917 # XXX We can probably eventually do better
1918 inner = msg.get_payload(0)
1919 unless(hasattr(inner, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04001920 self.assertEqual(len(inner.defects), 1)
1921 unless(isinstance(inner.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001922 errors.StartBoundaryNotFoundDefect))
1923
R David Murray80e0aee2012-05-27 21:23:34 -04001924 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001925 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001926 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001927 msg = self._msgobj('msg_25.txt')
1928 unless(isinstance(msg.get_payload(), str))
R David Murrayc27e5222012-05-25 15:01:48 -04001929 self.assertEqual(len(msg.defects), 2)
1930 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04001931 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04001932 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001933 errors.MultipartInvariantViolationDefect))
1934
R David Murray749073a2011-06-22 13:47:53 -04001935 multipart_msg = textwrap.dedent("""\
1936 Date: Wed, 14 Nov 2007 12:56:23 GMT
1937 From: foo@bar.invalid
1938 To: foo@bar.invalid
1939 Subject: Content-Transfer-Encoding: base64 and multipart
1940 MIME-Version: 1.0
1941 Content-Type: multipart/mixed;
1942 boundary="===============3344438784458119861=="{}
1943
1944 --===============3344438784458119861==
1945 Content-Type: text/plain
1946
1947 Test message
1948
1949 --===============3344438784458119861==
1950 Content-Type: application/octet-stream
1951 Content-Transfer-Encoding: base64
1952
1953 YWJj
1954
1955 --===============3344438784458119861==--
1956 """)
1957
R David Murray80e0aee2012-05-27 21:23:34 -04001958 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04001959 def test_multipart_invalid_cte(self):
R David Murrayc27e5222012-05-25 15:01:48 -04001960 msg = self._str_msg(
1961 self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
1962 self.assertEqual(len(msg.defects), 1)
1963 self.assertIsInstance(msg.defects[0],
R David Murray749073a2011-06-22 13:47:53 -04001964 errors.InvalidMultipartContentTransferEncodingDefect)
1965
R David Murray80e0aee2012-05-27 21:23:34 -04001966 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04001967 def test_multipart_no_cte_no_defect(self):
R David Murrayc27e5222012-05-25 15:01:48 -04001968 msg = self._str_msg(self.multipart_msg.format(''))
1969 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04001970
R David Murray80e0aee2012-05-27 21:23:34 -04001971 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04001972 def test_multipart_valid_cte_no_defect(self):
1973 for cte in ('7bit', '8bit', 'BINary'):
R David Murrayc27e5222012-05-25 15:01:48 -04001974 msg = self._str_msg(
R David Murray749073a2011-06-22 13:47:53 -04001975 self.multipart_msg.format(
R David Murrayc27e5222012-05-25 15:01:48 -04001976 "\nContent-Transfer-Encoding: {}".format(cte)))
1977 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04001978
R David Murray97f43c02012-06-24 05:03:27 -04001979 # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001980 def test_invalid_content_type(self):
1981 eq = self.assertEqual
1982 neq = self.ndiffAssertEqual
1983 msg = Message()
1984 # RFC 2045, $5.2 says invalid yields text/plain
1985 msg['Content-Type'] = 'text'
1986 eq(msg.get_content_maintype(), 'text')
1987 eq(msg.get_content_subtype(), 'plain')
1988 eq(msg.get_content_type(), 'text/plain')
1989 # Clear the old value and try something /really/ invalid
1990 del msg['content-type']
1991 msg['Content-Type'] = 'foo'
1992 eq(msg.get_content_maintype(), 'text')
1993 eq(msg.get_content_subtype(), 'plain')
1994 eq(msg.get_content_type(), 'text/plain')
1995 # Still, make sure that the message is idempotently generated
1996 s = StringIO()
1997 g = Generator(s)
1998 g.flatten(msg)
1999 neq(s.getvalue(), 'Content-Type: foo\n\n')
2000
2001 def test_no_start_boundary(self):
2002 eq = self.ndiffAssertEqual
2003 msg = self._msgobj('msg_31.txt')
2004 eq(msg.get_payload(), """\
2005--BOUNDARY
2006Content-Type: text/plain
2007
2008message 1
2009
2010--BOUNDARY
2011Content-Type: text/plain
2012
2013message 2
2014
2015--BOUNDARY--
2016""")
2017
2018 def test_no_separating_blank_line(self):
2019 eq = self.ndiffAssertEqual
2020 msg = self._msgobj('msg_35.txt')
2021 eq(msg.as_string(), """\
2022From: aperson@dom.ain
2023To: bperson@dom.ain
2024Subject: here's something interesting
2025
2026counter to RFC 2822, there's no separating newline here
2027""")
2028
R David Murray80e0aee2012-05-27 21:23:34 -04002029 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002030 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002031 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002032 msg = self._msgobj('msg_41.txt')
2033 unless(hasattr(msg, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04002034 self.assertEqual(len(msg.defects), 2)
2035 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04002036 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04002037 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002038 errors.MultipartInvariantViolationDefect))
2039
R David Murray80e0aee2012-05-27 21:23:34 -04002040 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002041 def test_missing_start_boundary(self):
2042 outer = self._msgobj('msg_42.txt')
2043 # The message structure is:
2044 #
2045 # multipart/mixed
2046 # text/plain
2047 # message/rfc822
2048 # multipart/mixed [*]
2049 #
2050 # [*] This message is missing its start boundary
2051 bad = outer.get_payload(1).get_payload(0)
R David Murrayc27e5222012-05-25 15:01:48 -04002052 self.assertEqual(len(bad.defects), 1)
2053 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002054 errors.StartBoundaryNotFoundDefect))
2055
R David Murray80e0aee2012-05-27 21:23:34 -04002056 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002057 def test_first_line_is_continuation_header(self):
2058 eq = self.assertEqual
R David Murrayadbdcdb2012-05-27 20:45:01 -04002059 m = ' Line 1\nSubject: test\n\nbody'
R David Murrayc27e5222012-05-25 15:01:48 -04002060 msg = email.message_from_string(m)
R David Murrayadbdcdb2012-05-27 20:45:01 -04002061 eq(msg.keys(), ['Subject'])
2062 eq(msg.get_payload(), 'body')
R David Murrayc27e5222012-05-25 15:01:48 -04002063 eq(len(msg.defects), 1)
R David Murrayadbdcdb2012-05-27 20:45:01 -04002064 self.assertDefectsEqual(msg.defects,
2065 [errors.FirstHeaderLineIsContinuationDefect])
R David Murrayc27e5222012-05-25 15:01:48 -04002066 eq(msg.defects[0].line, ' Line 1\n')
R David Murray3edd22a2011-04-18 13:59:37 -04002067
R David Murrayd41595b2012-05-28 20:14:10 -04002068 # test_defect_handling
R David Murrayadbdcdb2012-05-27 20:45:01 -04002069 def test_missing_header_body_separator(self):
2070 # Our heuristic if we see a line that doesn't look like a header (no
2071 # leading whitespace but no ':') is to assume that the blank line that
2072 # separates the header from the body is missing, and to stop parsing
2073 # headers and start parsing the body.
2074 msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
2075 self.assertEqual(msg.keys(), ['Subject'])
2076 self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
2077 self.assertDefectsEqual(msg.defects,
2078 [errors.MissingHeaderBodySeparatorDefect])
2079
Ezio Melottib3aedd42010-11-20 19:04:17 +00002080
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002081# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00002082class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002083 def test_rfc2047_multiline(self):
2084 eq = self.assertEqual
2085 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2086 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2087 dh = decode_header(s)
2088 eq(dh, [
R David Murray07ea53c2012-06-02 17:56:49 -04002089 (b'Re: ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002090 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
R David Murray07ea53c2012-06-02 17:56:49 -04002091 (b' baz foo bar ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002092 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2093 header = make_header(dh)
2094 eq(str(header),
2095 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00002096 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002097Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2098 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002099
R David Murray07ea53c2012-06-02 17:56:49 -04002100 def test_whitespace_keeper_unicode(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002101 eq = self.assertEqual
2102 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2103 dh = decode_header(s)
2104 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
R David Murray07ea53c2012-06-02 17:56:49 -04002105 (b' Pirard <pirard@dom.ain>', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002106 header = str(make_header(dh))
2107 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2108
R David Murray07ea53c2012-06-02 17:56:49 -04002109 def test_whitespace_keeper_unicode_2(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002110 eq = self.assertEqual
2111 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2112 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002113 eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'),
2114 (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002115 hu = str(make_header(dh))
2116 eq(hu, 'The quick brown fox jumped over the lazy dog')
2117
2118 def test_rfc2047_missing_whitespace(self):
2119 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2120 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002121 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2122 (b'rg', None), (b'\xe5', 'iso-8859-1'),
2123 (b'sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002124
2125 def test_rfc2047_with_whitespace(self):
2126 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2127 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002128 self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'),
2129 (b' rg ', None), (b'\xe5', 'iso-8859-1'),
2130 (b' sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002131
R. David Murrayc4e69cc2010-08-03 22:14:10 +00002132 def test_rfc2047_B_bad_padding(self):
2133 s = '=?iso-8859-1?B?%s?='
2134 data = [ # only test complete bytes
2135 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2136 ('dmk=', b'vi'), ('dmk', b'vi')
2137 ]
2138 for q, a in data:
2139 dh = decode_header(s % q)
2140 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002141
R. David Murray31e984c2010-10-01 15:40:20 +00002142 def test_rfc2047_Q_invalid_digits(self):
2143 # issue 10004.
2144 s = '=?iso-8659-1?Q?andr=e9=zz?='
2145 self.assertEqual(decode_header(s),
2146 [(b'andr\xe9=zz', 'iso-8659-1')])
2147
R David Murray07ea53c2012-06-02 17:56:49 -04002148 def test_rfc2047_rfc2047_1(self):
2149 # 1st testcase at end of rfc2047
2150 s = '(=?ISO-8859-1?Q?a?=)'
2151 self.assertEqual(decode_header(s),
2152 [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)])
2153
2154 def test_rfc2047_rfc2047_2(self):
2155 # 2nd testcase at end of rfc2047
2156 s = '(=?ISO-8859-1?Q?a?= b)'
2157 self.assertEqual(decode_header(s),
2158 [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)])
2159
2160 def test_rfc2047_rfc2047_3(self):
2161 # 3rd testcase at end of rfc2047
2162 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2163 self.assertEqual(decode_header(s),
2164 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2165
2166 def test_rfc2047_rfc2047_4(self):
2167 # 4th testcase at end of rfc2047
2168 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2169 self.assertEqual(decode_header(s),
2170 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2171
2172 def test_rfc2047_rfc2047_5a(self):
2173 # 5th testcase at end of rfc2047 newline is \r\n
2174 s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)'
2175 self.assertEqual(decode_header(s),
2176 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2177
2178 def test_rfc2047_rfc2047_5b(self):
2179 # 5th testcase at end of rfc2047 newline is \n
2180 s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)'
2181 self.assertEqual(decode_header(s),
2182 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2183
2184 def test_rfc2047_rfc2047_6(self):
2185 # 6th testcase at end of rfc2047
2186 s = '(=?ISO-8859-1?Q?a_b?=)'
2187 self.assertEqual(decode_header(s),
2188 [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)])
2189
2190 def test_rfc2047_rfc2047_7(self):
2191 # 7th testcase at end of rfc2047
2192 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)'
2193 self.assertEqual(decode_header(s),
2194 [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'),
2195 (b')', None)])
2196 self.assertEqual(make_header(decode_header(s)).encode(), s.lower())
2197 self.assertEqual(str(make_header(decode_header(s))), '(a b)')
2198
R David Murray82ffabd2012-06-03 12:27:07 -04002199 def test_multiline_header(self):
2200 s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>'
2201 self.assertEqual(decode_header(s),
2202 [(b'"M\xfcller T"', 'windows-1252'),
2203 (b'<T.Mueller@xxx.com>', None)])
2204 self.assertEqual(make_header(decode_header(s)).encode(),
2205 ''.join(s.splitlines()))
2206 self.assertEqual(str(make_header(decode_header(s))),
2207 '"Müller T" <T.Mueller@xxx.com>')
2208
Ezio Melottib3aedd42010-11-20 19:04:17 +00002209
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002210# Test the MIMEMessage class
2211class TestMIMEMessage(TestEmailBase):
2212 def setUp(self):
2213 with openfile('msg_11.txt') as fp:
2214 self._text = fp.read()
2215
2216 def test_type_error(self):
2217 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2218
2219 def test_valid_argument(self):
2220 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002221 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002222 subject = 'A sub-message'
2223 m = Message()
2224 m['Subject'] = subject
2225 r = MIMEMessage(m)
2226 eq(r.get_content_type(), 'message/rfc822')
2227 payload = r.get_payload()
2228 unless(isinstance(payload, list))
2229 eq(len(payload), 1)
2230 subpart = payload[0]
2231 unless(subpart is m)
2232 eq(subpart['subject'], subject)
2233
2234 def test_bad_multipart(self):
2235 eq = self.assertEqual
2236 msg1 = Message()
2237 msg1['Subject'] = 'subpart 1'
2238 msg2 = Message()
2239 msg2['Subject'] = 'subpart 2'
2240 r = MIMEMessage(msg1)
2241 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2242
2243 def test_generate(self):
2244 # First craft the message to be encapsulated
2245 m = Message()
2246 m['Subject'] = 'An enclosed message'
2247 m.set_payload('Here is the body of the message.\n')
2248 r = MIMEMessage(m)
2249 r['Subject'] = 'The enclosing message'
2250 s = StringIO()
2251 g = Generator(s)
2252 g.flatten(r)
2253 self.assertEqual(s.getvalue(), """\
2254Content-Type: message/rfc822
2255MIME-Version: 1.0
2256Subject: The enclosing message
2257
2258Subject: An enclosed message
2259
2260Here is the body of the message.
2261""")
2262
2263 def test_parse_message_rfc822(self):
2264 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002265 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002266 msg = self._msgobj('msg_11.txt')
2267 eq(msg.get_content_type(), 'message/rfc822')
2268 payload = msg.get_payload()
2269 unless(isinstance(payload, list))
2270 eq(len(payload), 1)
2271 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002272 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002273 eq(submsg['subject'], 'An enclosed message')
2274 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2275
2276 def test_dsn(self):
2277 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002278 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002279 # msg 16 is a Delivery Status Notification, see RFC 1894
2280 msg = self._msgobj('msg_16.txt')
2281 eq(msg.get_content_type(), 'multipart/report')
2282 unless(msg.is_multipart())
2283 eq(len(msg.get_payload()), 3)
2284 # Subpart 1 is a text/plain, human readable section
2285 subpart = msg.get_payload(0)
2286 eq(subpart.get_content_type(), 'text/plain')
2287 eq(subpart.get_payload(), """\
2288This report relates to a message you sent with the following header fields:
2289
2290 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2291 Date: Sun, 23 Sep 2001 20:10:55 -0700
2292 From: "Ian T. Henry" <henryi@oxy.edu>
2293 To: SoCal Raves <scr@socal-raves.org>
2294 Subject: [scr] yeah for Ians!!
2295
2296Your message cannot be delivered to the following recipients:
2297
2298 Recipient address: jangel1@cougar.noc.ucla.edu
2299 Reason: recipient reached disk quota
2300
2301""")
2302 # Subpart 2 contains the machine parsable DSN information. It
2303 # consists of two blocks of headers, represented by two nested Message
2304 # objects.
2305 subpart = msg.get_payload(1)
2306 eq(subpart.get_content_type(), 'message/delivery-status')
2307 eq(len(subpart.get_payload()), 2)
2308 # message/delivery-status should treat each block as a bunch of
2309 # headers, i.e. a bunch of Message objects.
2310 dsn1 = subpart.get_payload(0)
2311 unless(isinstance(dsn1, Message))
2312 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2313 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2314 # Try a missing one <wink>
2315 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2316 dsn2 = subpart.get_payload(1)
2317 unless(isinstance(dsn2, Message))
2318 eq(dsn2['action'], 'failed')
2319 eq(dsn2.get_params(header='original-recipient'),
2320 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2321 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2322 # Subpart 3 is the original message
2323 subpart = msg.get_payload(2)
2324 eq(subpart.get_content_type(), 'message/rfc822')
2325 payload = subpart.get_payload()
2326 unless(isinstance(payload, list))
2327 eq(len(payload), 1)
2328 subsubpart = payload[0]
2329 unless(isinstance(subsubpart, Message))
2330 eq(subsubpart.get_content_type(), 'text/plain')
2331 eq(subsubpart['message-id'],
2332 '<002001c144a6$8752e060$56104586@oxy.edu>')
2333
2334 def test_epilogue(self):
2335 eq = self.ndiffAssertEqual
2336 with openfile('msg_21.txt') as fp:
2337 text = fp.read()
2338 msg = Message()
2339 msg['From'] = 'aperson@dom.ain'
2340 msg['To'] = 'bperson@dom.ain'
2341 msg['Subject'] = 'Test'
2342 msg.preamble = 'MIME message'
2343 msg.epilogue = 'End of MIME message\n'
2344 msg1 = MIMEText('One')
2345 msg2 = MIMEText('Two')
2346 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2347 msg.attach(msg1)
2348 msg.attach(msg2)
2349 sfp = StringIO()
2350 g = Generator(sfp)
2351 g.flatten(msg)
2352 eq(sfp.getvalue(), text)
2353
2354 def test_no_nl_preamble(self):
2355 eq = self.ndiffAssertEqual
2356 msg = Message()
2357 msg['From'] = 'aperson@dom.ain'
2358 msg['To'] = 'bperson@dom.ain'
2359 msg['Subject'] = 'Test'
2360 msg.preamble = 'MIME message'
2361 msg.epilogue = ''
2362 msg1 = MIMEText('One')
2363 msg2 = MIMEText('Two')
2364 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2365 msg.attach(msg1)
2366 msg.attach(msg2)
2367 eq(msg.as_string(), """\
2368From: aperson@dom.ain
2369To: bperson@dom.ain
2370Subject: Test
2371Content-Type: multipart/mixed; boundary="BOUNDARY"
2372
2373MIME message
2374--BOUNDARY
2375Content-Type: text/plain; charset="us-ascii"
2376MIME-Version: 1.0
2377Content-Transfer-Encoding: 7bit
2378
2379One
2380--BOUNDARY
2381Content-Type: text/plain; charset="us-ascii"
2382MIME-Version: 1.0
2383Content-Transfer-Encoding: 7bit
2384
2385Two
2386--BOUNDARY--
2387""")
2388
2389 def test_default_type(self):
2390 eq = self.assertEqual
2391 with openfile('msg_30.txt') as fp:
2392 msg = email.message_from_file(fp)
2393 container1 = msg.get_payload(0)
2394 eq(container1.get_default_type(), 'message/rfc822')
2395 eq(container1.get_content_type(), 'message/rfc822')
2396 container2 = msg.get_payload(1)
2397 eq(container2.get_default_type(), 'message/rfc822')
2398 eq(container2.get_content_type(), 'message/rfc822')
2399 container1a = container1.get_payload(0)
2400 eq(container1a.get_default_type(), 'text/plain')
2401 eq(container1a.get_content_type(), 'text/plain')
2402 container2a = container2.get_payload(0)
2403 eq(container2a.get_default_type(), 'text/plain')
2404 eq(container2a.get_content_type(), 'text/plain')
2405
2406 def test_default_type_with_explicit_container_type(self):
2407 eq = self.assertEqual
2408 with openfile('msg_28.txt') as fp:
2409 msg = email.message_from_file(fp)
2410 container1 = msg.get_payload(0)
2411 eq(container1.get_default_type(), 'message/rfc822')
2412 eq(container1.get_content_type(), 'message/rfc822')
2413 container2 = msg.get_payload(1)
2414 eq(container2.get_default_type(), 'message/rfc822')
2415 eq(container2.get_content_type(), 'message/rfc822')
2416 container1a = container1.get_payload(0)
2417 eq(container1a.get_default_type(), 'text/plain')
2418 eq(container1a.get_content_type(), 'text/plain')
2419 container2a = container2.get_payload(0)
2420 eq(container2a.get_default_type(), 'text/plain')
2421 eq(container2a.get_content_type(), 'text/plain')
2422
2423 def test_default_type_non_parsed(self):
2424 eq = self.assertEqual
2425 neq = self.ndiffAssertEqual
2426 # Set up container
2427 container = MIMEMultipart('digest', 'BOUNDARY')
2428 container.epilogue = ''
2429 # Set up subparts
2430 subpart1a = MIMEText('message 1\n')
2431 subpart2a = MIMEText('message 2\n')
2432 subpart1 = MIMEMessage(subpart1a)
2433 subpart2 = MIMEMessage(subpart2a)
2434 container.attach(subpart1)
2435 container.attach(subpart2)
2436 eq(subpart1.get_content_type(), 'message/rfc822')
2437 eq(subpart1.get_default_type(), 'message/rfc822')
2438 eq(subpart2.get_content_type(), 'message/rfc822')
2439 eq(subpart2.get_default_type(), 'message/rfc822')
2440 neq(container.as_string(0), '''\
2441Content-Type: multipart/digest; boundary="BOUNDARY"
2442MIME-Version: 1.0
2443
2444--BOUNDARY
2445Content-Type: message/rfc822
2446MIME-Version: 1.0
2447
2448Content-Type: text/plain; charset="us-ascii"
2449MIME-Version: 1.0
2450Content-Transfer-Encoding: 7bit
2451
2452message 1
2453
2454--BOUNDARY
2455Content-Type: message/rfc822
2456MIME-Version: 1.0
2457
2458Content-Type: text/plain; charset="us-ascii"
2459MIME-Version: 1.0
2460Content-Transfer-Encoding: 7bit
2461
2462message 2
2463
2464--BOUNDARY--
2465''')
2466 del subpart1['content-type']
2467 del subpart1['mime-version']
2468 del subpart2['content-type']
2469 del subpart2['mime-version']
2470 eq(subpart1.get_content_type(), 'message/rfc822')
2471 eq(subpart1.get_default_type(), 'message/rfc822')
2472 eq(subpart2.get_content_type(), 'message/rfc822')
2473 eq(subpart2.get_default_type(), 'message/rfc822')
2474 neq(container.as_string(0), '''\
2475Content-Type: multipart/digest; boundary="BOUNDARY"
2476MIME-Version: 1.0
2477
2478--BOUNDARY
2479
2480Content-Type: text/plain; charset="us-ascii"
2481MIME-Version: 1.0
2482Content-Transfer-Encoding: 7bit
2483
2484message 1
2485
2486--BOUNDARY
2487
2488Content-Type: text/plain; charset="us-ascii"
2489MIME-Version: 1.0
2490Content-Transfer-Encoding: 7bit
2491
2492message 2
2493
2494--BOUNDARY--
2495''')
2496
2497 def test_mime_attachments_in_constructor(self):
2498 eq = self.assertEqual
2499 text1 = MIMEText('')
2500 text2 = MIMEText('')
2501 msg = MIMEMultipart(_subparts=(text1, text2))
2502 eq(len(msg.get_payload()), 2)
2503 eq(msg.get_payload(0), text1)
2504 eq(msg.get_payload(1), text2)
2505
Christian Heimes587c2bf2008-01-19 16:21:02 +00002506 def test_default_multipart_constructor(self):
2507 msg = MIMEMultipart()
2508 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002509
Ezio Melottib3aedd42010-11-20 19:04:17 +00002510
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002511# A general test of parser->model->generator idempotency. IOW, read a message
2512# in, parse it into a message object tree, then without touching the tree,
2513# regenerate the plain text. The original text and the transformed text
2514# should be identical. Note: that we ignore the Unix-From since that may
2515# contain a changed date.
2516class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002517
2518 linesep = '\n'
2519
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002520 def _msgobj(self, filename):
2521 with openfile(filename) as fp:
2522 data = fp.read()
2523 msg = email.message_from_string(data)
2524 return msg, data
2525
R. David Murray719a4492010-11-21 16:53:48 +00002526 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002527 eq = self.ndiffAssertEqual
2528 s = StringIO()
2529 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002530 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002531 eq(text, s.getvalue())
2532
2533 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002534 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002535 msg, text = self._msgobj('msg_01.txt')
2536 eq(msg.get_content_type(), 'text/plain')
2537 eq(msg.get_content_maintype(), 'text')
2538 eq(msg.get_content_subtype(), 'plain')
2539 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2540 eq(msg.get_param('charset'), 'us-ascii')
2541 eq(msg.preamble, None)
2542 eq(msg.epilogue, None)
2543 self._idempotent(msg, text)
2544
2545 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002546 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002547 msg, text = self._msgobj('msg_03.txt')
2548 eq(msg.get_content_type(), 'text/plain')
2549 eq(msg.get_params(), None)
2550 eq(msg.get_param('charset'), None)
2551 self._idempotent(msg, text)
2552
2553 def test_simple_multipart(self):
2554 msg, text = self._msgobj('msg_04.txt')
2555 self._idempotent(msg, text)
2556
2557 def test_MIME_digest(self):
2558 msg, text = self._msgobj('msg_02.txt')
2559 self._idempotent(msg, text)
2560
2561 def test_long_header(self):
2562 msg, text = self._msgobj('msg_27.txt')
2563 self._idempotent(msg, text)
2564
2565 def test_MIME_digest_with_part_headers(self):
2566 msg, text = self._msgobj('msg_28.txt')
2567 self._idempotent(msg, text)
2568
2569 def test_mixed_with_image(self):
2570 msg, text = self._msgobj('msg_06.txt')
2571 self._idempotent(msg, text)
2572
2573 def test_multipart_report(self):
2574 msg, text = self._msgobj('msg_05.txt')
2575 self._idempotent(msg, text)
2576
2577 def test_dsn(self):
2578 msg, text = self._msgobj('msg_16.txt')
2579 self._idempotent(msg, text)
2580
2581 def test_preamble_epilogue(self):
2582 msg, text = self._msgobj('msg_21.txt')
2583 self._idempotent(msg, text)
2584
2585 def test_multipart_one_part(self):
2586 msg, text = self._msgobj('msg_23.txt')
2587 self._idempotent(msg, text)
2588
2589 def test_multipart_no_parts(self):
2590 msg, text = self._msgobj('msg_24.txt')
2591 self._idempotent(msg, text)
2592
2593 def test_no_start_boundary(self):
2594 msg, text = self._msgobj('msg_31.txt')
2595 self._idempotent(msg, text)
2596
2597 def test_rfc2231_charset(self):
2598 msg, text = self._msgobj('msg_32.txt')
2599 self._idempotent(msg, text)
2600
2601 def test_more_rfc2231_parameters(self):
2602 msg, text = self._msgobj('msg_33.txt')
2603 self._idempotent(msg, text)
2604
2605 def test_text_plain_in_a_multipart_digest(self):
2606 msg, text = self._msgobj('msg_34.txt')
2607 self._idempotent(msg, text)
2608
2609 def test_nested_multipart_mixeds(self):
2610 msg, text = self._msgobj('msg_12a.txt')
2611 self._idempotent(msg, text)
2612
2613 def test_message_external_body_idempotent(self):
2614 msg, text = self._msgobj('msg_36.txt')
2615 self._idempotent(msg, text)
2616
R. David Murray719a4492010-11-21 16:53:48 +00002617 def test_message_delivery_status(self):
2618 msg, text = self._msgobj('msg_43.txt')
2619 self._idempotent(msg, text, unixfrom=True)
2620
R. David Murray96fd54e2010-10-08 15:55:28 +00002621 def test_message_signed_idempotent(self):
2622 msg, text = self._msgobj('msg_45.txt')
2623 self._idempotent(msg, text)
2624
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002625 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002626 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002627 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002628 # Get a message object and reset the seek pointer for other tests
2629 msg, text = self._msgobj('msg_05.txt')
2630 eq(msg.get_content_type(), 'multipart/report')
2631 # Test the Content-Type: parameters
2632 params = {}
2633 for pk, pv in msg.get_params():
2634 params[pk] = pv
2635 eq(params['report-type'], 'delivery-status')
2636 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002637 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2638 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002639 eq(len(msg.get_payload()), 3)
2640 # Make sure the subparts are what we expect
2641 msg1 = msg.get_payload(0)
2642 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002643 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002644 msg2 = msg.get_payload(1)
2645 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002646 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002647 msg3 = msg.get_payload(2)
2648 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002649 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002650 payload = msg3.get_payload()
2651 unless(isinstance(payload, list))
2652 eq(len(payload), 1)
2653 msg4 = payload[0]
2654 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002655 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002656
2657 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002658 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002659 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002660 msg, text = self._msgobj('msg_06.txt')
2661 # Check some of the outer headers
2662 eq(msg.get_content_type(), 'message/rfc822')
2663 # Make sure the payload is a list of exactly one sub-Message, and that
2664 # that submessage has a type of text/plain
2665 payload = msg.get_payload()
2666 unless(isinstance(payload, list))
2667 eq(len(payload), 1)
2668 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002669 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002670 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002671 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002672 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002673
2674
Ezio Melottib3aedd42010-11-20 19:04:17 +00002675
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002676# Test various other bits of the package's functionality
2677class TestMiscellaneous(TestEmailBase):
2678 def test_message_from_string(self):
2679 with openfile('msg_01.txt') as fp:
2680 text = fp.read()
2681 msg = email.message_from_string(text)
2682 s = StringIO()
2683 # Don't wrap/continue long headers since we're trying to test
2684 # idempotency.
2685 g = Generator(s, maxheaderlen=0)
2686 g.flatten(msg)
2687 self.assertEqual(text, s.getvalue())
2688
2689 def test_message_from_file(self):
2690 with openfile('msg_01.txt') as fp:
2691 text = fp.read()
2692 fp.seek(0)
2693 msg = email.message_from_file(fp)
2694 s = StringIO()
2695 # Don't wrap/continue long headers since we're trying to test
2696 # idempotency.
2697 g = Generator(s, maxheaderlen=0)
2698 g.flatten(msg)
2699 self.assertEqual(text, s.getvalue())
2700
2701 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002702 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002703 with openfile('msg_01.txt') as fp:
2704 text = fp.read()
2705
2706 # Create a subclass
2707 class MyMessage(Message):
2708 pass
2709
2710 msg = email.message_from_string(text, MyMessage)
2711 unless(isinstance(msg, MyMessage))
2712 # Try something more complicated
2713 with openfile('msg_02.txt') as fp:
2714 text = fp.read()
2715 msg = email.message_from_string(text, MyMessage)
2716 for subpart in msg.walk():
2717 unless(isinstance(subpart, MyMessage))
2718
2719 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002720 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002721 # Create a subclass
2722 class MyMessage(Message):
2723 pass
2724
2725 with openfile('msg_01.txt') as fp:
2726 msg = email.message_from_file(fp, MyMessage)
2727 unless(isinstance(msg, MyMessage))
2728 # Try something more complicated
2729 with openfile('msg_02.txt') as fp:
2730 msg = email.message_from_file(fp, MyMessage)
2731 for subpart in msg.walk():
2732 unless(isinstance(subpart, MyMessage))
2733
R David Murrayc27e5222012-05-25 15:01:48 -04002734 def test_custom_message_does_not_require_arguments(self):
2735 class MyMessage(Message):
2736 def __init__(self):
2737 super().__init__()
2738 msg = self._str_msg("Subject: test\n\ntest", MyMessage)
2739 self.assertTrue(isinstance(msg, MyMessage))
2740
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002741 def test__all__(self):
2742 module = __import__('email')
R David Murray1b6c7242012-03-16 22:43:05 -04002743 self.assertEqual(sorted(module.__all__), [
2744 'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2745 'generator', 'header', 'iterators', 'message',
2746 'message_from_binary_file', 'message_from_bytes',
2747 'message_from_file', 'message_from_string', 'mime', 'parser',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002748 'quoprimime', 'utils',
2749 ])
2750
2751 def test_formatdate(self):
2752 now = time.time()
2753 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2754 time.gmtime(now)[:6])
2755
2756 def test_formatdate_localtime(self):
2757 now = time.time()
2758 self.assertEqual(
2759 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2760 time.localtime(now)[:6])
2761
2762 def test_formatdate_usegmt(self):
2763 now = time.time()
2764 self.assertEqual(
2765 utils.formatdate(now, localtime=False),
2766 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2767 self.assertEqual(
2768 utils.formatdate(now, localtime=False, usegmt=True),
2769 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2770
Georg Brandl1aca31e2012-09-22 09:03:56 +02002771 # parsedate and parsedate_tz will become deprecated interfaces someday
2772 def test_parsedate_returns_None_for_invalid_strings(self):
2773 self.assertIsNone(utils.parsedate(''))
2774 self.assertIsNone(utils.parsedate_tz(''))
2775 self.assertIsNone(utils.parsedate('0'))
2776 self.assertIsNone(utils.parsedate_tz('0'))
2777 self.assertIsNone(utils.parsedate('A Complete Waste of Time'))
2778 self.assertIsNone(utils.parsedate_tz('A Complete Waste of Time'))
2779 # Not a part of the spec but, but this has historically worked:
2780 self.assertIsNone(utils.parsedate(None))
2781 self.assertIsNone(utils.parsedate_tz(None))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002782
2783 def test_parsedate_compact(self):
2784 # The FWS after the comma is optional
2785 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2786 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2787
2788 def test_parsedate_no_dayofweek(self):
2789 eq = self.assertEqual
2790 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2791 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2792
2793 def test_parsedate_compact_no_dayofweek(self):
2794 eq = self.assertEqual
2795 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2796 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2797
R. David Murray4a62e892010-12-23 20:35:46 +00002798 def test_parsedate_no_space_before_positive_offset(self):
2799 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2800 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2801
2802 def test_parsedate_no_space_before_negative_offset(self):
2803 # Issue 1155362: we already handled '+' for this case.
2804 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2805 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2806
2807
R David Murrayaccd1c02011-03-13 20:06:23 -04002808 def test_parsedate_accepts_time_with_dots(self):
2809 eq = self.assertEqual
2810 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2811 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2812 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2813 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2814
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002815 def test_parsedate_acceptable_to_time_functions(self):
2816 eq = self.assertEqual
2817 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2818 t = int(time.mktime(timetup))
2819 eq(time.localtime(t)[:6], timetup[:6])
2820 eq(int(time.strftime('%Y', timetup)), 2003)
2821 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2822 t = int(time.mktime(timetup[:9]))
2823 eq(time.localtime(t)[:6], timetup[:6])
2824 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2825
Alexander Belopolskya07548e2012-06-21 20:34:09 -04002826 def test_mktime_tz(self):
2827 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2828 -1, -1, -1, 0)), 0)
2829 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2830 -1, -1, -1, 1234)), -1234)
2831
R. David Murray219d1c82010-08-25 00:45:55 +00002832 def test_parsedate_y2k(self):
2833 """Test for parsing a date with a two-digit year.
2834
2835 Parsing a date with a two-digit year should return the correct
2836 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2837 obsoletes RFC822) requires four-digit years.
2838
2839 """
2840 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2841 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2842 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2843 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2844
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002845 def test_parseaddr_empty(self):
2846 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2847 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2848
2849 def test_noquote_dump(self):
2850 self.assertEqual(
2851 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2852 'A Silly Person <person@dom.ain>')
2853
2854 def test_escape_dump(self):
2855 self.assertEqual(
2856 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
R David Murrayb53319f2012-03-14 15:31:47 -04002857 r'"A (Very) Silly Person" <person@dom.ain>')
2858 self.assertEqual(
2859 utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
2860 ('A (Very) Silly Person', 'person@dom.ain'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002861 a = r'A \(Special\) Person'
2862 b = 'person@dom.ain'
2863 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2864
2865 def test_escape_backslashes(self):
2866 self.assertEqual(
2867 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2868 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2869 a = r'Arthur \Backslash\ Foobar'
2870 b = 'person@dom.ain'
2871 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2872
R David Murray8debacb2011-04-06 09:35:57 -04002873 def test_quotes_unicode_names(self):
2874 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2875 name = "H\u00e4ns W\u00fcrst"
2876 addr = 'person@dom.ain'
2877 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2878 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
2879 self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
2880 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
2881 latin1_quopri)
2882
2883 def test_accepts_any_charset_like_object(self):
2884 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2885 name = "H\u00e4ns W\u00fcrst"
2886 addr = 'person@dom.ain'
2887 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2888 foobar = "FOOBAR"
2889 class CharsetMock:
2890 def header_encode(self, string):
2891 return foobar
2892 mock = CharsetMock()
2893 mock_expected = "%s <%s>" % (foobar, addr)
2894 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
2895 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
2896 utf8_base64)
2897
2898 def test_invalid_charset_like_object_raises_error(self):
2899 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2900 name = "H\u00e4ns W\u00fcrst"
2901 addr = 'person@dom.ain'
2902 # A object without a header_encode method:
2903 bad_charset = object()
2904 self.assertRaises(AttributeError, utils.formataddr, (name, addr),
2905 bad_charset)
2906
2907 def test_unicode_address_raises_error(self):
2908 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2909 addr = 'pers\u00f6n@dom.in'
2910 self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
2911 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
2912
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002913 def test_name_with_dot(self):
2914 x = 'John X. Doe <jxd@example.com>'
2915 y = '"John X. Doe" <jxd@example.com>'
2916 a, b = ('John X. Doe', 'jxd@example.com')
2917 self.assertEqual(utils.parseaddr(x), (a, b))
2918 self.assertEqual(utils.parseaddr(y), (a, b))
2919 # formataddr() quotes the name if there's a dot in it
2920 self.assertEqual(utils.formataddr((a, b)), y)
2921
R. David Murray5397e862010-10-02 15:58:26 +00002922 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2923 # issue 10005. Note that in the third test the second pair of
2924 # backslashes is not actually a quoted pair because it is not inside a
2925 # comment or quoted string: the address being parsed has a quoted
2926 # string containing a quoted backslash, followed by 'example' and two
2927 # backslashes, followed by another quoted string containing a space and
2928 # the word 'example'. parseaddr copies those two backslashes
2929 # literally. Per rfc5322 this is not technically correct since a \ may
2930 # not appear in an address outside of a quoted string. It is probably
2931 # a sensible Postel interpretation, though.
2932 eq = self.assertEqual
2933 eq(utils.parseaddr('""example" example"@example.com'),
2934 ('', '""example" example"@example.com'))
2935 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2936 ('', '"\\"example\\" example"@example.com'))
2937 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2938 ('', '"\\\\"example\\\\" example"@example.com'))
2939
R. David Murray63563cd2010-12-18 18:25:38 +00002940 def test_parseaddr_preserves_spaces_in_local_part(self):
2941 # issue 9286. A normal RFC5322 local part should not contain any
2942 # folding white space, but legacy local parts can (they are a sequence
2943 # of atoms, not dotatoms). On the other hand we strip whitespace from
2944 # before the @ and around dots, on the assumption that the whitespace
2945 # around the punctuation is a mistake in what would otherwise be
2946 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2947 self.assertEqual(('', "merwok wok@xample.com"),
2948 utils.parseaddr("merwok wok@xample.com"))
2949 self.assertEqual(('', "merwok wok@xample.com"),
2950 utils.parseaddr("merwok wok@xample.com"))
2951 self.assertEqual(('', "merwok wok@xample.com"),
2952 utils.parseaddr(" merwok wok @xample.com"))
2953 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2954 utils.parseaddr('merwok"wok" wok@xample.com'))
2955 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2956 utils.parseaddr('merwok. wok . wok@xample.com'))
2957
R David Murrayb53319f2012-03-14 15:31:47 -04002958 def test_formataddr_does_not_quote_parens_in_quoted_string(self):
2959 addr = ("'foo@example.com' (foo@example.com)",
2960 'foo@example.com')
2961 addrstr = ('"\'foo@example.com\' '
2962 '(foo@example.com)" <foo@example.com>')
2963 self.assertEqual(utils.parseaddr(addrstr), addr)
2964 self.assertEqual(utils.formataddr(addr), addrstr)
2965
2966
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002967 def test_multiline_from_comment(self):
2968 x = """\
2969Foo
2970\tBar <foo@example.com>"""
2971 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2972
2973 def test_quote_dump(self):
2974 self.assertEqual(
2975 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2976 r'"A Silly; Person" <person@dom.ain>')
2977
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002978 def test_charset_richcomparisons(self):
2979 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002980 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002981 cset1 = Charset()
2982 cset2 = Charset()
2983 eq(cset1, 'us-ascii')
2984 eq(cset1, 'US-ASCII')
2985 eq(cset1, 'Us-AsCiI')
2986 eq('us-ascii', cset1)
2987 eq('US-ASCII', cset1)
2988 eq('Us-AsCiI', cset1)
2989 ne(cset1, 'usascii')
2990 ne(cset1, 'USASCII')
2991 ne(cset1, 'UsAsCiI')
2992 ne('usascii', cset1)
2993 ne('USASCII', cset1)
2994 ne('UsAsCiI', cset1)
2995 eq(cset1, cset2)
2996 eq(cset2, cset1)
2997
2998 def test_getaddresses(self):
2999 eq = self.assertEqual
3000 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
3001 'Bud Person <bperson@dom.ain>']),
3002 [('Al Person', 'aperson@dom.ain'),
3003 ('Bud Person', 'bperson@dom.ain')])
3004
3005 def test_getaddresses_nasty(self):
3006 eq = self.assertEqual
3007 eq(utils.getaddresses(['foo: ;']), [('', '')])
3008 eq(utils.getaddresses(
3009 ['[]*-- =~$']),
3010 [('', ''), ('', ''), ('', '*--')])
3011 eq(utils.getaddresses(
3012 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
3013 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
3014
3015 def test_getaddresses_embedded_comment(self):
3016 """Test proper handling of a nested comment"""
3017 eq = self.assertEqual
3018 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
3019 eq(addrs[0][1], 'foo@bar.com')
3020
3021 def test_utils_quote_unquote(self):
3022 eq = self.assertEqual
3023 msg = Message()
3024 msg.add_header('content-disposition', 'attachment',
3025 filename='foo\\wacky"name')
3026 eq(msg.get_filename(), 'foo\\wacky"name')
3027
3028 def test_get_body_encoding_with_bogus_charset(self):
3029 charset = Charset('not a charset')
3030 self.assertEqual(charset.get_body_encoding(), 'base64')
3031
3032 def test_get_body_encoding_with_uppercase_charset(self):
3033 eq = self.assertEqual
3034 msg = Message()
3035 msg['Content-Type'] = 'text/plain; charset=UTF-8'
3036 eq(msg['content-type'], 'text/plain; charset=UTF-8')
3037 charsets = msg.get_charsets()
3038 eq(len(charsets), 1)
3039 eq(charsets[0], 'utf-8')
3040 charset = Charset(charsets[0])
3041 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003042 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003043 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
3044 eq(msg.get_payload(decode=True), b'hello world')
3045 eq(msg['content-transfer-encoding'], 'base64')
3046 # Try another one
3047 msg = Message()
3048 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
3049 charsets = msg.get_charsets()
3050 eq(len(charsets), 1)
3051 eq(charsets[0], 'us-ascii')
3052 charset = Charset(charsets[0])
3053 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
3054 msg.set_payload('hello world', charset=charset)
3055 eq(msg.get_payload(), 'hello world')
3056 eq(msg['content-transfer-encoding'], '7bit')
3057
3058 def test_charsets_case_insensitive(self):
3059 lc = Charset('us-ascii')
3060 uc = Charset('US-ASCII')
3061 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
3062
3063 def test_partial_falls_inside_message_delivery_status(self):
3064 eq = self.ndiffAssertEqual
3065 # The Parser interface provides chunks of data to FeedParser in 8192
3066 # byte gulps. SF bug #1076485 found one of those chunks inside
3067 # message/delivery-status header block, which triggered an
3068 # unreadline() of NeedMoreData.
3069 msg = self._msgobj('msg_43.txt')
3070 sfp = StringIO()
3071 iterators._structure(msg, sfp)
3072 eq(sfp.getvalue(), """\
3073multipart/report
3074 text/plain
3075 message/delivery-status
3076 text/plain
3077 text/plain
3078 text/plain
3079 text/plain
3080 text/plain
3081 text/plain
3082 text/plain
3083 text/plain
3084 text/plain
3085 text/plain
3086 text/plain
3087 text/plain
3088 text/plain
3089 text/plain
3090 text/plain
3091 text/plain
3092 text/plain
3093 text/plain
3094 text/plain
3095 text/plain
3096 text/plain
3097 text/plain
3098 text/plain
3099 text/plain
3100 text/plain
3101 text/plain
3102 text/rfc822-headers
3103""")
3104
R. David Murraya0b44b52010-12-02 21:47:19 +00003105 def test_make_msgid_domain(self):
3106 self.assertEqual(
3107 email.utils.make_msgid(domain='testdomain-string')[-19:],
3108 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003109
R David Murraye67c6c52013-03-07 16:38:03 -05003110 def test_Generator_linend(self):
3111 # Issue 14645.
3112 with openfile('msg_26.txt', newline='\n') as f:
3113 msgtxt = f.read()
3114 msgtxt_nl = msgtxt.replace('\r\n', '\n')
3115 msg = email.message_from_string(msgtxt)
3116 s = StringIO()
3117 g = email.generator.Generator(s)
3118 g.flatten(msg)
3119 self.assertEqual(s.getvalue(), msgtxt_nl)
3120
3121 def test_BytesGenerator_linend(self):
3122 # Issue 14645.
3123 with openfile('msg_26.txt', newline='\n') as f:
3124 msgtxt = f.read()
3125 msgtxt_nl = msgtxt.replace('\r\n', '\n')
3126 msg = email.message_from_string(msgtxt_nl)
3127 s = BytesIO()
3128 g = email.generator.BytesGenerator(s)
3129 g.flatten(msg, linesep='\r\n')
3130 self.assertEqual(s.getvalue().decode('ascii'), msgtxt)
3131
3132 def test_BytesGenerator_linend_with_non_ascii(self):
3133 # Issue 14645.
3134 with openfile('msg_26.txt', 'rb') as f:
3135 msgtxt = f.read()
3136 msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6')
3137 msgtxt_nl = msgtxt.replace(b'\r\n', b'\n')
3138 msg = email.message_from_bytes(msgtxt_nl)
3139 s = BytesIO()
3140 g = email.generator.BytesGenerator(s)
3141 g.flatten(msg, linesep='\r\n')
3142 self.assertEqual(s.getvalue(), msgtxt)
3143
Ezio Melottib3aedd42010-11-20 19:04:17 +00003144
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003145# Test the iterator/generators
3146class TestIterators(TestEmailBase):
3147 def test_body_line_iterator(self):
3148 eq = self.assertEqual
3149 neq = self.ndiffAssertEqual
3150 # First a simple non-multipart message
3151 msg = self._msgobj('msg_01.txt')
3152 it = iterators.body_line_iterator(msg)
3153 lines = list(it)
3154 eq(len(lines), 6)
3155 neq(EMPTYSTRING.join(lines), msg.get_payload())
3156 # Now a more complicated multipart
3157 msg = self._msgobj('msg_02.txt')
3158 it = iterators.body_line_iterator(msg)
3159 lines = list(it)
3160 eq(len(lines), 43)
3161 with openfile('msg_19.txt') as fp:
3162 neq(EMPTYSTRING.join(lines), fp.read())
3163
3164 def test_typed_subpart_iterator(self):
3165 eq = self.assertEqual
3166 msg = self._msgobj('msg_04.txt')
3167 it = iterators.typed_subpart_iterator(msg, 'text')
3168 lines = []
3169 subparts = 0
3170 for subpart in it:
3171 subparts += 1
3172 lines.append(subpart.get_payload())
3173 eq(subparts, 2)
3174 eq(EMPTYSTRING.join(lines), """\
3175a simple kind of mirror
3176to reflect upon our own
3177a simple kind of mirror
3178to reflect upon our own
3179""")
3180
3181 def test_typed_subpart_iterator_default_type(self):
3182 eq = self.assertEqual
3183 msg = self._msgobj('msg_03.txt')
3184 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
3185 lines = []
3186 subparts = 0
3187 for subpart in it:
3188 subparts += 1
3189 lines.append(subpart.get_payload())
3190 eq(subparts, 1)
3191 eq(EMPTYSTRING.join(lines), """\
3192
3193Hi,
3194
3195Do you like this message?
3196
3197-Me
3198""")
3199
R. David Murray45bf773f2010-07-17 01:19:57 +00003200 def test_pushCR_LF(self):
3201 '''FeedParser BufferedSubFile.push() assumed it received complete
3202 line endings. A CR ending one push() followed by a LF starting
3203 the next push() added an empty line.
3204 '''
3205 imt = [
3206 ("a\r \n", 2),
3207 ("b", 0),
3208 ("c\n", 1),
3209 ("", 0),
3210 ("d\r\n", 1),
3211 ("e\r", 0),
3212 ("\nf", 1),
3213 ("\r\n", 1),
3214 ]
3215 from email.feedparser import BufferedSubFile, NeedMoreData
3216 bsf = BufferedSubFile()
3217 om = []
3218 nt = 0
3219 for il, n in imt:
3220 bsf.push(il)
3221 nt += n
3222 n1 = 0
3223 while True:
3224 ol = bsf.readline()
3225 if ol == NeedMoreData:
3226 break
3227 om.append(ol)
3228 n1 += 1
3229 self.assertTrue(n == n1)
3230 self.assertTrue(len(om) == nt)
3231 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
3232
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003233
Ezio Melottib3aedd42010-11-20 19:04:17 +00003234
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003235class TestParsers(TestEmailBase):
R David Murrayb35c8502011-04-13 16:46:05 -04003236
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003237 def test_header_parser(self):
3238 eq = self.assertEqual
3239 # Parse only the headers of a complex multipart MIME document
3240 with openfile('msg_02.txt') as fp:
3241 msg = HeaderParser().parse(fp)
3242 eq(msg['from'], 'ppp-request@zzz.org')
3243 eq(msg['to'], 'ppp@zzz.org')
3244 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003245 self.assertFalse(msg.is_multipart())
3246 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003247
R David Murrayb35c8502011-04-13 16:46:05 -04003248 def test_bytes_header_parser(self):
3249 eq = self.assertEqual
3250 # Parse only the headers of a complex multipart MIME document
3251 with openfile('msg_02.txt', 'rb') as fp:
3252 msg = email.parser.BytesHeaderParser().parse(fp)
3253 eq(msg['from'], 'ppp-request@zzz.org')
3254 eq(msg['to'], 'ppp@zzz.org')
3255 eq(msg.get_content_type(), 'multipart/mixed')
3256 self.assertFalse(msg.is_multipart())
3257 self.assertTrue(isinstance(msg.get_payload(), str))
3258 self.assertTrue(isinstance(msg.get_payload(decode=True), bytes))
3259
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003260 def test_whitespace_continuation(self):
3261 eq = self.assertEqual
3262 # This message contains a line after the Subject: header that has only
3263 # whitespace, but it is not empty!
3264 msg = email.message_from_string("""\
3265From: aperson@dom.ain
3266To: bperson@dom.ain
3267Subject: the next line has a space on it
3268\x20
3269Date: Mon, 8 Apr 2002 15:09:19 -0400
3270Message-ID: spam
3271
3272Here's the message body
3273""")
3274 eq(msg['subject'], 'the next line has a space on it\n ')
3275 eq(msg['message-id'], 'spam')
3276 eq(msg.get_payload(), "Here's the message body\n")
3277
3278 def test_whitespace_continuation_last_header(self):
3279 eq = self.assertEqual
3280 # Like the previous test, but the subject line is the last
3281 # header.
3282 msg = email.message_from_string("""\
3283From: aperson@dom.ain
3284To: bperson@dom.ain
3285Date: Mon, 8 Apr 2002 15:09:19 -0400
3286Message-ID: spam
3287Subject: the next line has a space on it
3288\x20
3289
3290Here's the message body
3291""")
3292 eq(msg['subject'], 'the next line has a space on it\n ')
3293 eq(msg['message-id'], 'spam')
3294 eq(msg.get_payload(), "Here's the message body\n")
3295
3296 def test_crlf_separation(self):
3297 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003298 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003299 msg = Parser().parse(fp)
3300 eq(len(msg.get_payload()), 2)
3301 part1 = msg.get_payload(0)
3302 eq(part1.get_content_type(), 'text/plain')
3303 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3304 part2 = msg.get_payload(1)
3305 eq(part2.get_content_type(), 'application/riscos')
3306
R. David Murray8451c4b2010-10-23 22:19:56 +00003307 def test_crlf_flatten(self):
3308 # Using newline='\n' preserves the crlfs in this input file.
3309 with openfile('msg_26.txt', newline='\n') as fp:
3310 text = fp.read()
3311 msg = email.message_from_string(text)
3312 s = StringIO()
3313 g = Generator(s)
3314 g.flatten(msg, linesep='\r\n')
3315 self.assertEqual(s.getvalue(), text)
3316
3317 maxDiff = None
3318
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003319 def test_multipart_digest_with_extra_mime_headers(self):
3320 eq = self.assertEqual
3321 neq = self.ndiffAssertEqual
3322 with openfile('msg_28.txt') as fp:
3323 msg = email.message_from_file(fp)
3324 # Structure is:
3325 # multipart/digest
3326 # message/rfc822
3327 # text/plain
3328 # message/rfc822
3329 # text/plain
3330 eq(msg.is_multipart(), 1)
3331 eq(len(msg.get_payload()), 2)
3332 part1 = msg.get_payload(0)
3333 eq(part1.get_content_type(), 'message/rfc822')
3334 eq(part1.is_multipart(), 1)
3335 eq(len(part1.get_payload()), 1)
3336 part1a = part1.get_payload(0)
3337 eq(part1a.is_multipart(), 0)
3338 eq(part1a.get_content_type(), 'text/plain')
3339 neq(part1a.get_payload(), 'message 1\n')
3340 # next message/rfc822
3341 part2 = msg.get_payload(1)
3342 eq(part2.get_content_type(), 'message/rfc822')
3343 eq(part2.is_multipart(), 1)
3344 eq(len(part2.get_payload()), 1)
3345 part2a = part2.get_payload(0)
3346 eq(part2a.is_multipart(), 0)
3347 eq(part2a.get_content_type(), 'text/plain')
3348 neq(part2a.get_payload(), 'message 2\n')
3349
3350 def test_three_lines(self):
3351 # A bug report by Andrew McNamara
3352 lines = ['From: Andrew Person <aperson@dom.ain',
3353 'Subject: Test',
3354 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3355 msg = email.message_from_string(NL.join(lines))
3356 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3357
3358 def test_strip_line_feed_and_carriage_return_in_headers(self):
3359 eq = self.assertEqual
3360 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3361 value1 = 'text'
3362 value2 = 'more text'
3363 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3364 value1, value2)
3365 msg = email.message_from_string(m)
3366 eq(msg.get('Header'), value1)
3367 eq(msg.get('Next-Header'), value2)
3368
3369 def test_rfc2822_header_syntax(self):
3370 eq = self.assertEqual
3371 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3372 msg = email.message_from_string(m)
3373 eq(len(msg), 3)
3374 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3375 eq(msg.get_payload(), 'body')
3376
3377 def test_rfc2822_space_not_allowed_in_header(self):
3378 eq = self.assertEqual
3379 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3380 msg = email.message_from_string(m)
3381 eq(len(msg.keys()), 0)
3382
3383 def test_rfc2822_one_character_header(self):
3384 eq = self.assertEqual
3385 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3386 msg = email.message_from_string(m)
3387 headers = msg.keys()
3388 headers.sort()
3389 eq(headers, ['A', 'B', 'CC'])
3390 eq(msg.get_payload(), 'body')
3391
R. David Murray45e0e142010-06-16 02:19:40 +00003392 def test_CRLFLF_at_end_of_part(self):
3393 # issue 5610: feedparser should not eat two chars from body part ending
3394 # with "\r\n\n".
3395 m = (
3396 "From: foo@bar.com\n"
3397 "To: baz\n"
3398 "Mime-Version: 1.0\n"
3399 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3400 "\n"
3401 "--BOUNDARY\n"
3402 "Content-Type: text/plain\n"
3403 "\n"
3404 "body ending with CRLF newline\r\n"
3405 "\n"
3406 "--BOUNDARY--\n"
3407 )
3408 msg = email.message_from_string(m)
3409 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003410
Ezio Melottib3aedd42010-11-20 19:04:17 +00003411
R. David Murray96fd54e2010-10-08 15:55:28 +00003412class Test8BitBytesHandling(unittest.TestCase):
3413 # In Python3 all input is string, but that doesn't work if the actual input
3414 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3415 # decode byte streams using the surrogateescape error handler, and
3416 # reconvert to binary at appropriate places if we detect surrogates. This
3417 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3418 # but it does allow us to parse and preserve them, and to decode body
3419 # parts that use an 8bit CTE.
3420
3421 bodytest_msg = textwrap.dedent("""\
3422 From: foo@bar.com
3423 To: baz
3424 Mime-Version: 1.0
3425 Content-Type: text/plain; charset={charset}
3426 Content-Transfer-Encoding: {cte}
3427
3428 {bodyline}
3429 """)
3430
3431 def test_known_8bit_CTE(self):
3432 m = self.bodytest_msg.format(charset='utf-8',
3433 cte='8bit',
3434 bodyline='pöstal').encode('utf-8')
3435 msg = email.message_from_bytes(m)
3436 self.assertEqual(msg.get_payload(), "pöstal\n")
3437 self.assertEqual(msg.get_payload(decode=True),
3438 "pöstal\n".encode('utf-8'))
3439
3440 def test_unknown_8bit_CTE(self):
3441 m = self.bodytest_msg.format(charset='notavalidcharset',
3442 cte='8bit',
3443 bodyline='pöstal').encode('utf-8')
3444 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003445 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003446 self.assertEqual(msg.get_payload(decode=True),
3447 "pöstal\n".encode('utf-8'))
3448
3449 def test_8bit_in_quopri_body(self):
3450 # This is non-RFC compliant data...without 'decode' the library code
3451 # decodes the body using the charset from the headers, and because the
3452 # source byte really is utf-8 this works. This is likely to fail
3453 # against real dirty data (ie: produce mojibake), but the data is
3454 # invalid anyway so it is as good a guess as any. But this means that
3455 # this test just confirms the current behavior; that behavior is not
3456 # necessarily the best possible behavior. With 'decode' it is
3457 # returning the raw bytes, so that test should be of correct behavior,
3458 # or at least produce the same result that email4 did.
3459 m = self.bodytest_msg.format(charset='utf-8',
3460 cte='quoted-printable',
3461 bodyline='p=C3=B6stál').encode('utf-8')
3462 msg = email.message_from_bytes(m)
3463 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3464 self.assertEqual(msg.get_payload(decode=True),
3465 'pöstál\n'.encode('utf-8'))
3466
3467 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3468 # This is similar to the previous test, but proves that if the 8bit
3469 # byte is undecodeable in the specified charset, it gets replaced
3470 # by the unicode 'unknown' character. Again, this may or may not
3471 # be the ideal behavior. Note that if decode=False none of the
3472 # decoders will get involved, so this is the only test we need
3473 # for this behavior.
3474 m = self.bodytest_msg.format(charset='ascii',
3475 cte='quoted-printable',
3476 bodyline='p=C3=B6stál').encode('utf-8')
3477 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003478 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003479 self.assertEqual(msg.get_payload(decode=True),
3480 'pöstál\n'.encode('utf-8'))
3481
R David Murray80e0aee2012-05-27 21:23:34 -04003482 # test_defect_handling:test_invalid_chars_in_base64_payload
R. David Murray96fd54e2010-10-08 15:55:28 +00003483 def test_8bit_in_base64_body(self):
R David Murray80e0aee2012-05-27 21:23:34 -04003484 # If we get 8bit bytes in a base64 body, we can just ignore them
3485 # as being outside the base64 alphabet and decode anyway. But
3486 # we register a defect.
R. David Murray96fd54e2010-10-08 15:55:28 +00003487 m = self.bodytest_msg.format(charset='utf-8',
3488 cte='base64',
3489 bodyline='cMO2c3RhbAá=').encode('utf-8')
3490 msg = email.message_from_bytes(m)
3491 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -04003492 'pöstal'.encode('utf-8'))
3493 self.assertIsInstance(msg.defects[0],
3494 errors.InvalidBase64CharactersDefect)
R. David Murray96fd54e2010-10-08 15:55:28 +00003495
3496 def test_8bit_in_uuencode_body(self):
3497 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3498 # normal means, so the block is returned undecoded, but as bytes.
3499 m = self.bodytest_msg.format(charset='utf-8',
3500 cte='uuencode',
3501 bodyline='<,.V<W1A; á ').encode('utf-8')
3502 msg = email.message_from_bytes(m)
3503 self.assertEqual(msg.get_payload(decode=True),
3504 '<,.V<W1A; á \n'.encode('utf-8'))
3505
3506
R. David Murray92532142011-01-07 23:25:30 +00003507 headertest_headers = (
3508 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3509 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3510 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3511 '\tJean de Baddie',
3512 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3513 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3514 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3515 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3516 )
3517 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3518 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003519
3520 def test_get_8bit_header(self):
3521 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003522 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3523 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003524
3525 def test_print_8bit_headers(self):
3526 msg = email.message_from_bytes(self.headertest_msg)
3527 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003528 textwrap.dedent("""\
3529 From: {}
3530 To: {}
3531 Subject: {}
3532 From: {}
3533
3534 Yes, they are flying.
3535 """).format(*[expected[1] for (_, expected) in
3536 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003537
3538 def test_values_with_8bit_headers(self):
3539 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003540 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003541 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003542 'b\uFFFD\uFFFDz',
3543 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3544 'coll\uFFFD\uFFFDgue, le pouf '
3545 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003546 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003547 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003548
3549 def test_items_with_8bit_headers(self):
3550 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003551 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003552 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003553 ('To', 'b\uFFFD\uFFFDz'),
3554 ('Subject', 'Maintenant je vous '
3555 'pr\uFFFD\uFFFDsente '
3556 'mon coll\uFFFD\uFFFDgue, le pouf '
3557 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3558 '\tJean de Baddie'),
3559 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003560
3561 def test_get_all_with_8bit_headers(self):
3562 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003563 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003564 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003565 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003566
R David Murraya2150232011-03-16 21:11:23 -04003567 def test_get_content_type_with_8bit(self):
3568 msg = email.message_from_bytes(textwrap.dedent("""\
3569 Content-Type: text/pl\xA7in; charset=utf-8
3570 """).encode('latin-1'))
3571 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3572 self.assertEqual(msg.get_content_maintype(), "text")
3573 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3574
R David Murray97f43c02012-06-24 05:03:27 -04003575 # test_headerregistry.TestContentTypeHeader.non_ascii_in_params
R David Murraya2150232011-03-16 21:11:23 -04003576 def test_get_params_with_8bit(self):
3577 msg = email.message_from_bytes(
3578 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3579 self.assertEqual(msg.get_params(header='x-header'),
3580 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3581 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3582 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3583 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3584
R David Murray97f43c02012-06-24 05:03:27 -04003585 # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value
R David Murraya2150232011-03-16 21:11:23 -04003586 def test_get_rfc2231_params_with_8bit(self):
3587 msg = email.message_from_bytes(textwrap.dedent("""\
3588 Content-Type: text/plain; charset=us-ascii;
3589 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3590 ).encode('latin-1'))
3591 self.assertEqual(msg.get_param('title'),
3592 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3593
3594 def test_set_rfc2231_params_with_8bit(self):
3595 msg = email.message_from_bytes(textwrap.dedent("""\
3596 Content-Type: text/plain; charset=us-ascii;
3597 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3598 ).encode('latin-1'))
3599 msg.set_param('title', 'test')
3600 self.assertEqual(msg.get_param('title'), 'test')
3601
3602 def test_del_rfc2231_params_with_8bit(self):
3603 msg = email.message_from_bytes(textwrap.dedent("""\
3604 Content-Type: text/plain; charset=us-ascii;
3605 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3606 ).encode('latin-1'))
3607 msg.del_param('title')
3608 self.assertEqual(msg.get_param('title'), None)
3609 self.assertEqual(msg.get_content_maintype(), 'text')
3610
3611 def test_get_payload_with_8bit_cte_header(self):
3612 msg = email.message_from_bytes(textwrap.dedent("""\
3613 Content-Transfer-Encoding: b\xa7se64
3614 Content-Type: text/plain; charset=latin-1
3615
3616 payload
3617 """).encode('latin-1'))
3618 self.assertEqual(msg.get_payload(), 'payload\n')
3619 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3620
R. David Murray96fd54e2010-10-08 15:55:28 +00003621 non_latin_bin_msg = textwrap.dedent("""\
3622 From: foo@bar.com
3623 To: báz
3624 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3625 \tJean de Baddie
3626 Mime-Version: 1.0
3627 Content-Type: text/plain; charset="utf-8"
3628 Content-Transfer-Encoding: 8bit
3629
3630 Да, они летят.
3631 """).encode('utf-8')
3632
3633 def test_bytes_generator(self):
3634 msg = email.message_from_bytes(self.non_latin_bin_msg)
3635 out = BytesIO()
3636 email.generator.BytesGenerator(out).flatten(msg)
3637 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3638
R. David Murray7372a072011-01-26 21:21:32 +00003639 def test_bytes_generator_handles_None_body(self):
3640 #Issue 11019
3641 msg = email.message.Message()
3642 out = BytesIO()
3643 email.generator.BytesGenerator(out).flatten(msg)
3644 self.assertEqual(out.getvalue(), b"\n")
3645
R. David Murray92532142011-01-07 23:25:30 +00003646 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003647 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003648 To: =?unknown-8bit?q?b=C3=A1z?=
3649 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3650 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3651 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003652 Mime-Version: 1.0
3653 Content-Type: text/plain; charset="utf-8"
3654 Content-Transfer-Encoding: base64
3655
3656 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3657 """)
3658
3659 def test_generator_handles_8bit(self):
3660 msg = email.message_from_bytes(self.non_latin_bin_msg)
3661 out = StringIO()
3662 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003663 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003664
3665 def test_bytes_generator_with_unix_from(self):
3666 # The unixfrom contains a current date, so we can't check it
3667 # literally. Just make sure the first word is 'From' and the
3668 # rest of the message matches the input.
3669 msg = email.message_from_bytes(self.non_latin_bin_msg)
3670 out = BytesIO()
3671 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3672 lines = out.getvalue().split(b'\n')
3673 self.assertEqual(lines[0].split()[0], b'From')
3674 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3675
R. David Murray92532142011-01-07 23:25:30 +00003676 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3677 non_latin_bin_msg_as7bit[2:4] = [
3678 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3679 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3680 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3681
R. David Murray96fd54e2010-10-08 15:55:28 +00003682 def test_message_from_binary_file(self):
3683 fn = 'test.msg'
3684 self.addCleanup(unlink, fn)
3685 with open(fn, 'wb') as testfile:
3686 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003687 with open(fn, 'rb') as testfile:
3688 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003689 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3690
3691 latin_bin_msg = textwrap.dedent("""\
3692 From: foo@bar.com
3693 To: Dinsdale
3694 Subject: Nudge nudge, wink, wink
3695 Mime-Version: 1.0
3696 Content-Type: text/plain; charset="latin-1"
3697 Content-Transfer-Encoding: 8bit
3698
3699 oh là là, know what I mean, know what I mean?
3700 """).encode('latin-1')
3701
3702 latin_bin_msg_as7bit = textwrap.dedent("""\
3703 From: foo@bar.com
3704 To: Dinsdale
3705 Subject: Nudge nudge, wink, wink
3706 Mime-Version: 1.0
3707 Content-Type: text/plain; charset="iso-8859-1"
3708 Content-Transfer-Encoding: quoted-printable
3709
3710 oh l=E0 l=E0, know what I mean, know what I mean?
3711 """)
3712
3713 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3714 m = email.message_from_bytes(self.latin_bin_msg)
3715 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3716
3717 def test_decoded_generator_emits_unicode_body(self):
3718 m = email.message_from_bytes(self.latin_bin_msg)
3719 out = StringIO()
3720 email.generator.DecodedGenerator(out).flatten(m)
3721 #DecodedHeader output contains an extra blank line compared
3722 #to the input message. RDM: not sure if this is a bug or not,
3723 #but it is not specific to the 8bit->7bit conversion.
3724 self.assertEqual(out.getvalue(),
3725 self.latin_bin_msg.decode('latin-1')+'\n')
3726
3727 def test_bytes_feedparser(self):
3728 bfp = email.feedparser.BytesFeedParser()
3729 for i in range(0, len(self.latin_bin_msg), 10):
3730 bfp.feed(self.latin_bin_msg[i:i+10])
3731 m = bfp.close()
3732 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3733
R. David Murray8451c4b2010-10-23 22:19:56 +00003734 def test_crlf_flatten(self):
3735 with openfile('msg_26.txt', 'rb') as fp:
3736 text = fp.read()
3737 msg = email.message_from_bytes(text)
3738 s = BytesIO()
3739 g = email.generator.BytesGenerator(s)
3740 g.flatten(msg, linesep='\r\n')
3741 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003742
3743 def test_8bit_multipart(self):
3744 # Issue 11605
3745 source = textwrap.dedent("""\
3746 Date: Fri, 18 Mar 2011 17:15:43 +0100
3747 To: foo@example.com
3748 From: foodwatch-Newsletter <bar@example.com>
3749 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3750 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3751 MIME-Version: 1.0
3752 Content-Type: multipart/alternative;
3753 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3754
3755 --b1_76a486bee62b0d200f33dc2ca08220ad
3756 Content-Type: text/plain; charset="utf-8"
3757 Content-Transfer-Encoding: 8bit
3758
3759 Guten Tag, ,
3760
3761 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3762 Nachrichten aus Japan.
3763
3764
3765 --b1_76a486bee62b0d200f33dc2ca08220ad
3766 Content-Type: text/html; charset="utf-8"
3767 Content-Transfer-Encoding: 8bit
3768
3769 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3770 "http://www.w3.org/TR/html4/loose.dtd">
3771 <html lang="de">
3772 <head>
3773 <title>foodwatch - Newsletter</title>
3774 </head>
3775 <body>
3776 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3777 die Nachrichten aus Japan.</p>
3778 </body>
3779 </html>
3780 --b1_76a486bee62b0d200f33dc2ca08220ad--
3781
3782 """).encode('utf-8')
3783 msg = email.message_from_bytes(source)
3784 s = BytesIO()
3785 g = email.generator.BytesGenerator(s)
3786 g.flatten(msg)
3787 self.assertEqual(s.getvalue(), source)
3788
R David Murray9fd170e2012-03-14 14:05:03 -04003789 def test_bytes_generator_b_encoding_linesep(self):
3790 # Issue 14062: b encoding was tacking on an extra \n.
3791 m = Message()
3792 # This has enough non-ascii that it should always end up b encoded.
3793 m['Subject'] = Header('žluťoučký kůň')
3794 s = BytesIO()
3795 g = email.generator.BytesGenerator(s)
3796 g.flatten(m, linesep='\r\n')
3797 self.assertEqual(
3798 s.getvalue(),
3799 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3800
3801 def test_generator_b_encoding_linesep(self):
3802 # Since this broke in ByteGenerator, test Generator for completeness.
3803 m = Message()
3804 # This has enough non-ascii that it should always end up b encoded.
3805 m['Subject'] = Header('žluťoučký kůň')
3806 s = StringIO()
3807 g = email.generator.Generator(s)
3808 g.flatten(m, linesep='\r\n')
3809 self.assertEqual(
3810 s.getvalue(),
3811 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3812
R. David Murray8451c4b2010-10-23 22:19:56 +00003813 maxDiff = None
3814
Ezio Melottib3aedd42010-11-20 19:04:17 +00003815
R. David Murray719a4492010-11-21 16:53:48 +00003816class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003817
R. David Murraye5db2632010-11-20 15:10:13 +00003818 maxDiff = None
3819
R. David Murray96fd54e2010-10-08 15:55:28 +00003820 def _msgobj(self, filename):
3821 with openfile(filename, 'rb') as fp:
3822 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003823 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003824 msg = email.message_from_bytes(data)
3825 return msg, data
3826
R. David Murray719a4492010-11-21 16:53:48 +00003827 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003828 b = BytesIO()
3829 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003830 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R David Murraya46ed112011-03-31 13:11:40 -04003831 self.assertEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003832
3833
R. David Murray719a4492010-11-21 16:53:48 +00003834class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3835 TestIdempotent):
3836 linesep = '\n'
3837 blinesep = b'\n'
3838 normalize_linesep_regex = re.compile(br'\r\n')
3839
3840
3841class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3842 TestIdempotent):
3843 linesep = '\r\n'
3844 blinesep = b'\r\n'
3845 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3846
Ezio Melottib3aedd42010-11-20 19:04:17 +00003847
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003848class TestBase64(unittest.TestCase):
3849 def test_len(self):
3850 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003851 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003852 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003853 for size in range(15):
3854 if size == 0 : bsize = 0
3855 elif size <= 3 : bsize = 4
3856 elif size <= 6 : bsize = 8
3857 elif size <= 9 : bsize = 12
3858 elif size <= 12: bsize = 16
3859 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003860 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003861
3862 def test_decode(self):
3863 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003864 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003865 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003866
3867 def test_encode(self):
3868 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003869 eq(base64mime.body_encode(b''), b'')
3870 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003871 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003872 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003873 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003874 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003875eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3876eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3877eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3878eHh4eCB4eHh4IA==
3879""")
3880 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003881 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003882 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003883eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3884eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3885eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3886eHh4eCB4eHh4IA==\r
3887""")
3888
3889 def test_header_encode(self):
3890 eq = self.assertEqual
3891 he = base64mime.header_encode
3892 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003893 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3894 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003895 # Test the charset option
3896 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3897 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003898
3899
Ezio Melottib3aedd42010-11-20 19:04:17 +00003900
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003901class TestQuopri(unittest.TestCase):
3902 def setUp(self):
3903 # Set of characters (as byte integers) that don't need to be encoded
3904 # in headers.
3905 self.hlit = list(chain(
3906 range(ord('a'), ord('z') + 1),
3907 range(ord('A'), ord('Z') + 1),
3908 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003909 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003910 # Set of characters (as byte integers) that do need to be encoded in
3911 # headers.
3912 self.hnon = [c for c in range(256) if c not in self.hlit]
3913 assert len(self.hlit) + len(self.hnon) == 256
3914 # Set of characters (as byte integers) that don't need to be encoded
3915 # in bodies.
3916 self.blit = list(range(ord(' '), ord('~') + 1))
3917 self.blit.append(ord('\t'))
3918 self.blit.remove(ord('='))
3919 # Set of characters (as byte integers) that do need to be encoded in
3920 # bodies.
3921 self.bnon = [c for c in range(256) if c not in self.blit]
3922 assert len(self.blit) + len(self.bnon) == 256
3923
Guido van Rossum9604e662007-08-30 03:46:43 +00003924 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003925 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003926 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003927 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003928 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003929 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003930 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003931
Guido van Rossum9604e662007-08-30 03:46:43 +00003932 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003933 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003934 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003935 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003936 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003937 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003938 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003939
3940 def test_header_quopri_len(self):
3941 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003942 eq(quoprimime.header_length(b'hello'), 5)
3943 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003944 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003945 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003946 # =?xxx?q?...?= means 10 extra characters
3947 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003948 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3949 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003950 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003951 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003952 # =?xxx?q?...?= means 10 extra characters
3953 10)
3954 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003955 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003956 'expected length 1 for %r' % chr(c))
3957 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003958 # Space is special; it's encoded to _
3959 if c == ord(' '):
3960 continue
3961 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003962 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003963 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003964
3965 def test_body_quopri_len(self):
3966 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003967 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003968 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003969 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003970 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003971
3972 def test_quote_unquote_idempotent(self):
3973 for x in range(256):
3974 c = chr(x)
3975 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3976
R David Murrayec1b5b82011-03-23 14:19:05 -04003977 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3978 if charset is None:
3979 encoded_header = quoprimime.header_encode(header)
3980 else:
3981 encoded_header = quoprimime.header_encode(header, charset)
3982 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003983
R David Murraycafd79d2011-03-23 15:25:55 -04003984 def test_header_encode_null(self):
3985 self._test_header_encode(b'', '')
3986
R David Murrayec1b5b82011-03-23 14:19:05 -04003987 def test_header_encode_one_word(self):
3988 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3989
3990 def test_header_encode_two_lines(self):
3991 self._test_header_encode(b'hello\nworld',
3992 '=?iso-8859-1?q?hello=0Aworld?=')
3993
3994 def test_header_encode_non_ascii(self):
3995 self._test_header_encode(b'hello\xc7there',
3996 '=?iso-8859-1?q?hello=C7there?=')
3997
3998 def test_header_encode_alt_charset(self):
3999 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
4000 charset='iso-8859-2')
4001
4002 def _test_header_decode(self, encoded_header, expected_decoded_header):
4003 decoded_header = quoprimime.header_decode(encoded_header)
4004 self.assertEqual(decoded_header, expected_decoded_header)
4005
4006 def test_header_decode_null(self):
4007 self._test_header_decode('', '')
4008
4009 def test_header_decode_one_word(self):
4010 self._test_header_decode('hello', 'hello')
4011
4012 def test_header_decode_two_lines(self):
4013 self._test_header_decode('hello=0Aworld', 'hello\nworld')
4014
4015 def test_header_decode_non_ascii(self):
4016 self._test_header_decode('hello=C7there', 'hello\xc7there')
4017
4018 def _test_decode(self, encoded, expected_decoded, eol=None):
4019 if eol is None:
4020 decoded = quoprimime.decode(encoded)
4021 else:
4022 decoded = quoprimime.decode(encoded, eol=eol)
4023 self.assertEqual(decoded, expected_decoded)
4024
4025 def test_decode_null_word(self):
4026 self._test_decode('', '')
4027
4028 def test_decode_null_line_null_word(self):
4029 self._test_decode('\r\n', '\n')
4030
4031 def test_decode_one_word(self):
4032 self._test_decode('hello', 'hello')
4033
4034 def test_decode_one_word_eol(self):
4035 self._test_decode('hello', 'hello', eol='X')
4036
4037 def test_decode_one_line(self):
4038 self._test_decode('hello\r\n', 'hello\n')
4039
4040 def test_decode_one_line_lf(self):
4041 self._test_decode('hello\n', 'hello\n')
4042
R David Murraycafd79d2011-03-23 15:25:55 -04004043 def test_decode_one_line_cr(self):
4044 self._test_decode('hello\r', 'hello\n')
4045
4046 def test_decode_one_line_nl(self):
4047 self._test_decode('hello\n', 'helloX', eol='X')
4048
4049 def test_decode_one_line_crnl(self):
4050 self._test_decode('hello\r\n', 'helloX', eol='X')
4051
R David Murrayec1b5b82011-03-23 14:19:05 -04004052 def test_decode_one_line_one_word(self):
4053 self._test_decode('hello\r\nworld', 'hello\nworld')
4054
4055 def test_decode_one_line_one_word_eol(self):
4056 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
4057
4058 def test_decode_two_lines(self):
4059 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
4060
R David Murraycafd79d2011-03-23 15:25:55 -04004061 def test_decode_two_lines_eol(self):
4062 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
4063
R David Murrayec1b5b82011-03-23 14:19:05 -04004064 def test_decode_one_long_line(self):
4065 self._test_decode('Spam' * 250, 'Spam' * 250)
4066
4067 def test_decode_one_space(self):
4068 self._test_decode(' ', '')
4069
4070 def test_decode_multiple_spaces(self):
4071 self._test_decode(' ' * 5, '')
4072
4073 def test_decode_one_line_trailing_spaces(self):
4074 self._test_decode('hello \r\n', 'hello\n')
4075
4076 def test_decode_two_lines_trailing_spaces(self):
4077 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
4078
4079 def test_decode_quoted_word(self):
4080 self._test_decode('=22quoted=20words=22', '"quoted words"')
4081
4082 def test_decode_uppercase_quoting(self):
4083 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
4084
4085 def test_decode_lowercase_quoting(self):
4086 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
4087
4088 def test_decode_soft_line_break(self):
4089 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
4090
4091 def test_decode_false_quoting(self):
4092 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
4093
4094 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
4095 kwargs = {}
4096 if maxlinelen is None:
4097 # Use body_encode's default.
4098 maxlinelen = 76
4099 else:
4100 kwargs['maxlinelen'] = maxlinelen
4101 if eol is None:
4102 # Use body_encode's default.
4103 eol = '\n'
4104 else:
4105 kwargs['eol'] = eol
4106 encoded_body = quoprimime.body_encode(body, **kwargs)
4107 self.assertEqual(encoded_body, expected_encoded_body)
4108 if eol == '\n' or eol == '\r\n':
4109 # We know how to split the result back into lines, so maxlinelen
4110 # can be checked.
4111 for line in encoded_body.splitlines():
4112 self.assertLessEqual(len(line), maxlinelen)
4113
4114 def test_encode_null(self):
4115 self._test_encode('', '')
4116
4117 def test_encode_null_lines(self):
4118 self._test_encode('\n\n', '\n\n')
4119
4120 def test_encode_one_line(self):
4121 self._test_encode('hello\n', 'hello\n')
4122
4123 def test_encode_one_line_crlf(self):
4124 self._test_encode('hello\r\n', 'hello\n')
4125
4126 def test_encode_one_line_eol(self):
4127 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
4128
4129 def test_encode_one_space(self):
4130 self._test_encode(' ', '=20')
4131
4132 def test_encode_one_line_one_space(self):
4133 self._test_encode(' \n', '=20\n')
4134
R David Murrayb938c8c2011-03-24 12:19:26 -04004135# XXX: body_encode() expect strings, but uses ord(char) from these strings
4136# to index into a 256-entry list. For code points above 255, this will fail.
4137# Should there be a check for 8-bit only ord() values in body, or at least
4138# a comment about the expected input?
4139
4140 def test_encode_two_lines_one_space(self):
4141 self._test_encode(' \n \n', '=20\n=20\n')
4142
R David Murrayec1b5b82011-03-23 14:19:05 -04004143 def test_encode_one_word_trailing_spaces(self):
4144 self._test_encode('hello ', 'hello =20')
4145
4146 def test_encode_one_line_trailing_spaces(self):
4147 self._test_encode('hello \n', 'hello =20\n')
4148
4149 def test_encode_one_word_trailing_tab(self):
4150 self._test_encode('hello \t', 'hello =09')
4151
4152 def test_encode_one_line_trailing_tab(self):
4153 self._test_encode('hello \t\n', 'hello =09\n')
4154
4155 def test_encode_trailing_space_before_maxlinelen(self):
4156 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
4157
R David Murrayb938c8c2011-03-24 12:19:26 -04004158 def test_encode_trailing_space_at_maxlinelen(self):
4159 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
4160
R David Murrayec1b5b82011-03-23 14:19:05 -04004161 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04004162 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
4163
4164 def test_encode_whitespace_lines(self):
4165 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04004166
4167 def test_encode_quoted_equals(self):
4168 self._test_encode('a = b', 'a =3D b')
4169
4170 def test_encode_one_long_string(self):
4171 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
4172
4173 def test_encode_one_long_line(self):
4174 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4175
4176 def test_encode_one_very_long_line(self):
4177 self._test_encode('x' * 200 + '\n',
4178 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
4179
4180 def test_encode_one_long_line(self):
4181 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4182
4183 def test_encode_shortest_maxlinelen(self):
4184 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004185
R David Murrayb938c8c2011-03-24 12:19:26 -04004186 def test_encode_maxlinelen_too_small(self):
4187 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
4188
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004189 def test_encode(self):
4190 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00004191 eq(quoprimime.body_encode(''), '')
4192 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004193 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00004194 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004195 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00004196 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004197xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
4198 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
4199x xxxx xxxx xxxx xxxx=20""")
4200 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00004201 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4202 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004203xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
4204 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
4205x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00004206 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004207one line
4208
4209two line"""), """\
4210one line
4211
4212two line""")
4213
4214
Ezio Melottib3aedd42010-11-20 19:04:17 +00004215
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004216# Test the Charset class
4217class TestCharset(unittest.TestCase):
4218 def tearDown(self):
4219 from email import charset as CharsetModule
4220 try:
4221 del CharsetModule.CHARSETS['fake']
4222 except KeyError:
4223 pass
4224
Guido van Rossum9604e662007-08-30 03:46:43 +00004225 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004226 eq = self.assertEqual
4227 # Make sure us-ascii = no Unicode conversion
4228 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00004229 eq(c.header_encode('Hello World!'), 'Hello World!')
4230 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004231 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00004232 self.assertRaises(UnicodeError, c.header_encode, s)
4233 c = Charset('utf-8')
4234 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004235
4236 def test_body_encode(self):
4237 eq = self.assertEqual
4238 # Try a charset with QP body encoding
4239 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004240 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004241 # Try a charset with Base64 body encoding
4242 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00004243 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004244 # Try a charset with None body encoding
4245 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004246 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004247 # Try the convert argument, where input codec != output codec
4248 c = Charset('euc-jp')
4249 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00004250 # XXX FIXME
4251## try:
4252## eq('\x1b$B5FCO;~IW\x1b(B',
4253## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4254## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4255## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4256## except LookupError:
4257## # We probably don't have the Japanese codecs installed
4258## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004259 # Testing SF bug #625509, which we have to fake, since there are no
4260 # built-in encodings where the header encoding is QP but the body
4261 # encoding is not.
4262 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04004263 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004264 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04004265 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004266
4267 def test_unicode_charset_name(self):
4268 charset = Charset('us-ascii')
4269 self.assertEqual(str(charset), 'us-ascii')
4270 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4271
4272
Ezio Melottib3aedd42010-11-20 19:04:17 +00004273
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004274# Test multilingual MIME headers.
4275class TestHeader(TestEmailBase):
4276 def test_simple(self):
4277 eq = self.ndiffAssertEqual
4278 h = Header('Hello World!')
4279 eq(h.encode(), 'Hello World!')
4280 h.append(' Goodbye World!')
4281 eq(h.encode(), 'Hello World! Goodbye World!')
4282
4283 def test_simple_surprise(self):
4284 eq = self.ndiffAssertEqual
4285 h = Header('Hello World!')
4286 eq(h.encode(), 'Hello World!')
4287 h.append('Goodbye World!')
4288 eq(h.encode(), 'Hello World! Goodbye World!')
4289
4290 def test_header_needs_no_decoding(self):
4291 h = 'no decoding needed'
4292 self.assertEqual(decode_header(h), [(h, None)])
4293
4294 def test_long(self):
4295 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4296 maxlinelen=76)
4297 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004298 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004299
4300 def test_multilingual(self):
4301 eq = self.ndiffAssertEqual
4302 g = Charset("iso-8859-1")
4303 cz = Charset("iso-8859-2")
4304 utf8 = Charset("utf-8")
4305 g_head = (b'Die Mieter treten hier ein werden mit einem '
4306 b'Foerderband komfortabel den Korridor entlang, '
4307 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4308 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4309 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4310 b'd\xf9vtipu.. ')
4311 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4312 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4313 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4314 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4315 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4316 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4317 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4318 '\u3044\u307e\u3059\u3002')
4319 h = Header(g_head, g)
4320 h.append(cz_head, cz)
4321 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004322 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004323 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004324=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4325 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4326 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4327 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004328 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4329 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4330 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4331 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004332 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4333 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4334 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4335 decoded = decode_header(enc)
4336 eq(len(decoded), 3)
4337 eq(decoded[0], (g_head, 'iso-8859-1'))
4338 eq(decoded[1], (cz_head, 'iso-8859-2'))
4339 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004340 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004341 eq(ustr,
4342 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4343 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4344 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4345 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4346 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4347 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4348 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4349 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4350 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4351 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4352 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4353 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4354 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4355 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4356 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4357 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4358 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004359 # Test make_header()
4360 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004361 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004362
4363 def test_empty_header_encode(self):
4364 h = Header()
4365 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004366
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004367 def test_header_ctor_default_args(self):
4368 eq = self.ndiffAssertEqual
4369 h = Header()
4370 eq(h, '')
4371 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004372 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004373
4374 def test_explicit_maxlinelen(self):
4375 eq = self.ndiffAssertEqual
4376 hstr = ('A very long line that must get split to something other '
4377 'than at the 76th character boundary to test the non-default '
4378 'behavior')
4379 h = Header(hstr)
4380 eq(h.encode(), '''\
4381A very long line that must get split to something other than at the 76th
4382 character boundary to test the non-default behavior''')
4383 eq(str(h), hstr)
4384 h = Header(hstr, header_name='Subject')
4385 eq(h.encode(), '''\
4386A very long line that must get split to something other than at the
4387 76th character boundary to test the non-default behavior''')
4388 eq(str(h), hstr)
4389 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4390 eq(h.encode(), hstr)
4391 eq(str(h), hstr)
4392
Guido van Rossum9604e662007-08-30 03:46:43 +00004393 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004394 eq = self.ndiffAssertEqual
4395 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004396 x = 'xxxx ' * 20
4397 h.append(x)
4398 s = h.encode()
4399 eq(s, """\
4400=?iso-8859-1?q?xxx?=
4401 =?iso-8859-1?q?x_?=
4402 =?iso-8859-1?q?xx?=
4403 =?iso-8859-1?q?xx?=
4404 =?iso-8859-1?q?_x?=
4405 =?iso-8859-1?q?xx?=
4406 =?iso-8859-1?q?x_?=
4407 =?iso-8859-1?q?xx?=
4408 =?iso-8859-1?q?xx?=
4409 =?iso-8859-1?q?_x?=
4410 =?iso-8859-1?q?xx?=
4411 =?iso-8859-1?q?x_?=
4412 =?iso-8859-1?q?xx?=
4413 =?iso-8859-1?q?xx?=
4414 =?iso-8859-1?q?_x?=
4415 =?iso-8859-1?q?xx?=
4416 =?iso-8859-1?q?x_?=
4417 =?iso-8859-1?q?xx?=
4418 =?iso-8859-1?q?xx?=
4419 =?iso-8859-1?q?_x?=
4420 =?iso-8859-1?q?xx?=
4421 =?iso-8859-1?q?x_?=
4422 =?iso-8859-1?q?xx?=
4423 =?iso-8859-1?q?xx?=
4424 =?iso-8859-1?q?_x?=
4425 =?iso-8859-1?q?xx?=
4426 =?iso-8859-1?q?x_?=
4427 =?iso-8859-1?q?xx?=
4428 =?iso-8859-1?q?xx?=
4429 =?iso-8859-1?q?_x?=
4430 =?iso-8859-1?q?xx?=
4431 =?iso-8859-1?q?x_?=
4432 =?iso-8859-1?q?xx?=
4433 =?iso-8859-1?q?xx?=
4434 =?iso-8859-1?q?_x?=
4435 =?iso-8859-1?q?xx?=
4436 =?iso-8859-1?q?x_?=
4437 =?iso-8859-1?q?xx?=
4438 =?iso-8859-1?q?xx?=
4439 =?iso-8859-1?q?_x?=
4440 =?iso-8859-1?q?xx?=
4441 =?iso-8859-1?q?x_?=
4442 =?iso-8859-1?q?xx?=
4443 =?iso-8859-1?q?xx?=
4444 =?iso-8859-1?q?_x?=
4445 =?iso-8859-1?q?xx?=
4446 =?iso-8859-1?q?x_?=
4447 =?iso-8859-1?q?xx?=
4448 =?iso-8859-1?q?xx?=
4449 =?iso-8859-1?q?_?=""")
4450 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004451 h = Header(charset='iso-8859-1', maxlinelen=40)
4452 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004453 s = h.encode()
4454 eq(s, """\
4455=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4456 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4457 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4458 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4459 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4460 eq(x, str(make_header(decode_header(s))))
4461
4462 def test_base64_splittable(self):
4463 eq = self.ndiffAssertEqual
4464 h = Header(charset='koi8-r', maxlinelen=20)
4465 x = 'xxxx ' * 20
4466 h.append(x)
4467 s = h.encode()
4468 eq(s, """\
4469=?koi8-r?b?eHh4?=
4470 =?koi8-r?b?eCB4?=
4471 =?koi8-r?b?eHh4?=
4472 =?koi8-r?b?IHh4?=
4473 =?koi8-r?b?eHgg?=
4474 =?koi8-r?b?eHh4?=
4475 =?koi8-r?b?eCB4?=
4476 =?koi8-r?b?eHh4?=
4477 =?koi8-r?b?IHh4?=
4478 =?koi8-r?b?eHgg?=
4479 =?koi8-r?b?eHh4?=
4480 =?koi8-r?b?eCB4?=
4481 =?koi8-r?b?eHh4?=
4482 =?koi8-r?b?IHh4?=
4483 =?koi8-r?b?eHgg?=
4484 =?koi8-r?b?eHh4?=
4485 =?koi8-r?b?eCB4?=
4486 =?koi8-r?b?eHh4?=
4487 =?koi8-r?b?IHh4?=
4488 =?koi8-r?b?eHgg?=
4489 =?koi8-r?b?eHh4?=
4490 =?koi8-r?b?eCB4?=
4491 =?koi8-r?b?eHh4?=
4492 =?koi8-r?b?IHh4?=
4493 =?koi8-r?b?eHgg?=
4494 =?koi8-r?b?eHh4?=
4495 =?koi8-r?b?eCB4?=
4496 =?koi8-r?b?eHh4?=
4497 =?koi8-r?b?IHh4?=
4498 =?koi8-r?b?eHgg?=
4499 =?koi8-r?b?eHh4?=
4500 =?koi8-r?b?eCB4?=
4501 =?koi8-r?b?eHh4?=
4502 =?koi8-r?b?IA==?=""")
4503 eq(x, str(make_header(decode_header(s))))
4504 h = Header(charset='koi8-r', maxlinelen=40)
4505 h.append(x)
4506 s = h.encode()
4507 eq(s, """\
4508=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4509 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4510 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4511 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4512 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4513 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4514 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004515
4516 def test_us_ascii_header(self):
4517 eq = self.assertEqual
4518 s = 'hello'
4519 x = decode_header(s)
4520 eq(x, [('hello', None)])
4521 h = make_header(x)
4522 eq(s, h.encode())
4523
4524 def test_string_charset(self):
4525 eq = self.assertEqual
4526 h = Header()
4527 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004528 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004529
4530## def test_unicode_error(self):
4531## raises = self.assertRaises
4532## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4533## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4534## h = Header()
4535## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4536## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4537## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4538
4539 def test_utf8_shortest(self):
4540 eq = self.assertEqual
4541 h = Header('p\xf6stal', 'utf-8')
4542 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4543 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4544 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4545
4546 def test_bad_8bit_header(self):
4547 raises = self.assertRaises
4548 eq = self.assertEqual
4549 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4550 raises(UnicodeError, Header, x)
4551 h = Header()
4552 raises(UnicodeError, h.append, x)
4553 e = x.decode('utf-8', 'replace')
4554 eq(str(Header(x, errors='replace')), e)
4555 h.append(x, errors='replace')
4556 eq(str(h), e)
4557
R David Murray041015c2011-03-25 15:10:55 -04004558 def test_escaped_8bit_header(self):
4559 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004560 e = x.decode('ascii', 'surrogateescape')
4561 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004562 self.assertEqual(str(h),
4563 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4564 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4565
R David Murraye5e366c2011-06-18 12:57:28 -04004566 def test_header_handles_binary_unknown8bit(self):
4567 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4568 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4569 self.assertEqual(str(h),
4570 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4571 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4572
4573 def test_make_header_handles_binary_unknown8bit(self):
4574 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4575 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4576 h2 = email.header.make_header(email.header.decode_header(h))
4577 self.assertEqual(str(h2),
4578 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4579 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4580
R David Murray041015c2011-03-25 15:10:55 -04004581 def test_modify_returned_list_does_not_change_header(self):
4582 h = Header('test')
4583 chunks = email.header.decode_header(h)
4584 chunks.append(('ascii', 'test2'))
4585 self.assertEqual(str(h), 'test')
4586
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004587 def test_encoded_adjacent_nonencoded(self):
4588 eq = self.assertEqual
4589 h = Header()
4590 h.append('hello', 'iso-8859-1')
4591 h.append('world')
4592 s = h.encode()
4593 eq(s, '=?iso-8859-1?q?hello?= world')
4594 h = make_header(decode_header(s))
4595 eq(h.encode(), s)
4596
R David Murray07ea53c2012-06-02 17:56:49 -04004597 def test_whitespace_keeper(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004598 eq = self.assertEqual
4599 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4600 parts = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04004601 eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004602 hdr = make_header(parts)
4603 eq(hdr.encode(),
4604 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4605
4606 def test_broken_base64_header(self):
4607 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004608 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004609 raises(errors.HeaderParseError, decode_header, s)
4610
R. David Murray477efb32011-01-05 01:39:32 +00004611 def test_shift_jis_charset(self):
4612 h = Header('文', charset='shift_jis')
4613 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4614
R David Murrayde912762011-03-16 18:26:23 -04004615 def test_flatten_header_with_no_value(self):
4616 # Issue 11401 (regression from email 4.x) Note that the space after
4617 # the header doesn't reflect the input, but this is also the way
4618 # email 4.x behaved. At some point it would be nice to fix that.
4619 msg = email.message_from_string("EmptyHeader:")
4620 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4621
R David Murray01581ee2011-04-18 10:04:34 -04004622 def test_encode_preserves_leading_ws_on_value(self):
4623 msg = Message()
4624 msg['SomeHeader'] = ' value with leading ws'
4625 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4626
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004627
Ezio Melottib3aedd42010-11-20 19:04:17 +00004628
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004629# Test RFC 2231 header parameters (en/de)coding
4630class TestRFC2231(TestEmailBase):
R David Murray97f43c02012-06-24 05:03:27 -04004631
4632 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
4633 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004634 def test_get_param(self):
4635 eq = self.assertEqual
4636 msg = self._msgobj('msg_29.txt')
4637 eq(msg.get_param('title'),
4638 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4639 eq(msg.get_param('title', unquote=False),
4640 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4641
4642 def test_set_param(self):
4643 eq = self.ndiffAssertEqual
4644 msg = Message()
4645 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4646 charset='us-ascii')
4647 eq(msg.get_param('title'),
4648 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4649 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4650 charset='us-ascii', language='en')
4651 eq(msg.get_param('title'),
4652 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4653 msg = self._msgobj('msg_01.txt')
4654 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4655 charset='us-ascii', language='en')
4656 eq(msg.as_string(maxheaderlen=78), """\
4657Return-Path: <bbb@zzz.org>
4658Delivered-To: bbb@zzz.org
4659Received: by mail.zzz.org (Postfix, from userid 889)
4660\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4661MIME-Version: 1.0
4662Content-Transfer-Encoding: 7bit
4663Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4664From: bbb@ddd.com (John X. Doe)
4665To: bbb@zzz.org
4666Subject: This is a test message
4667Date: Fri, 4 May 2001 14:05:44 -0400
4668Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004669 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004670
4671
4672Hi,
4673
4674Do you like this message?
4675
4676-Me
4677""")
4678
R David Murraya2860e82011-04-16 09:20:30 -04004679 def test_set_param_requote(self):
4680 msg = Message()
4681 msg.set_param('title', 'foo')
4682 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4683 msg.set_param('title', 'bar', requote=False)
4684 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4685 # tspecial is still quoted.
4686 msg.set_param('title', "(bar)bell", requote=False)
4687 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4688
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004689 def test_del_param(self):
4690 eq = self.ndiffAssertEqual
4691 msg = self._msgobj('msg_01.txt')
4692 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4693 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4694 charset='us-ascii', language='en')
4695 msg.del_param('foo', header='Content-Type')
4696 eq(msg.as_string(maxheaderlen=78), """\
4697Return-Path: <bbb@zzz.org>
4698Delivered-To: bbb@zzz.org
4699Received: by mail.zzz.org (Postfix, from userid 889)
4700\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4701MIME-Version: 1.0
4702Content-Transfer-Encoding: 7bit
4703Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4704From: bbb@ddd.com (John X. Doe)
4705To: bbb@zzz.org
4706Subject: This is a test message
4707Date: Fri, 4 May 2001 14:05:44 -0400
4708Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004709 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004710
4711
4712Hi,
4713
4714Do you like this message?
4715
4716-Me
4717""")
4718
R David Murray97f43c02012-06-24 05:03:27 -04004719 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset
4720 # I changed the charset name, though, because the one in the file isn't
4721 # a legal charset name. Should add a test for an illegal charset.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004722 def test_rfc2231_get_content_charset(self):
4723 eq = self.assertEqual
4724 msg = self._msgobj('msg_32.txt')
4725 eq(msg.get_content_charset(), 'us-ascii')
4726
R David Murray97f43c02012-06-24 05:03:27 -04004727 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes
R. David Murraydfd7eb02010-12-24 22:36:49 +00004728 def test_rfc2231_parse_rfc_quoting(self):
4729 m = textwrap.dedent('''\
4730 Content-Disposition: inline;
4731 \tfilename*0*=''This%20is%20even%20more%20;
4732 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4733 \tfilename*2="is it not.pdf"
4734
4735 ''')
4736 msg = email.message_from_string(m)
4737 self.assertEqual(msg.get_filename(),
4738 'This is even more ***fun*** is it not.pdf')
4739 self.assertEqual(m, msg.as_string())
4740
R David Murray97f43c02012-06-24 05:03:27 -04004741 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
R. David Murraydfd7eb02010-12-24 22:36:49 +00004742 def test_rfc2231_parse_extra_quoting(self):
4743 m = textwrap.dedent('''\
4744 Content-Disposition: inline;
4745 \tfilename*0*="''This%20is%20even%20more%20";
4746 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4747 \tfilename*2="is it not.pdf"
4748
4749 ''')
4750 msg = email.message_from_string(m)
4751 self.assertEqual(msg.get_filename(),
4752 'This is even more ***fun*** is it not.pdf')
4753 self.assertEqual(m, msg.as_string())
4754
R David Murray97f43c02012-06-24 05:03:27 -04004755 # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset
4756 # but new test uses *0* because otherwise lang/charset is not valid.
4757 # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004758 def test_rfc2231_no_language_or_charset(self):
4759 m = '''\
4760Content-Transfer-Encoding: 8bit
4761Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4762Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4763
4764'''
4765 msg = email.message_from_string(m)
4766 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004767 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004768 self.assertEqual(
4769 param,
4770 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4771
R David Murray97f43c02012-06-24 05:03:27 -04004772 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004773 def test_rfc2231_no_language_or_charset_in_filename(self):
4774 m = '''\
4775Content-Disposition: inline;
4776\tfilename*0*="''This%20is%20even%20more%20";
4777\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4778\tfilename*2="is it not.pdf"
4779
4780'''
4781 msg = email.message_from_string(m)
4782 self.assertEqual(msg.get_filename(),
4783 'This is even more ***fun*** is it not.pdf')
4784
R David Murray97f43c02012-06-24 05:03:27 -04004785 # Duplicate of previous test?
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004786 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4787 m = '''\
4788Content-Disposition: inline;
4789\tfilename*0*="''This%20is%20even%20more%20";
4790\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4791\tfilename*2="is it not.pdf"
4792
4793'''
4794 msg = email.message_from_string(m)
4795 self.assertEqual(msg.get_filename(),
4796 'This is even more ***fun*** is it not.pdf')
4797
R David Murray97f43c02012-06-24 05:03:27 -04004798 # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded,
4799 # but the test below is wrong (the first part should be decoded).
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004800 def test_rfc2231_partly_encoded(self):
4801 m = '''\
4802Content-Disposition: inline;
4803\tfilename*0="''This%20is%20even%20more%20";
4804\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4805\tfilename*2="is it not.pdf"
4806
4807'''
4808 msg = email.message_from_string(m)
4809 self.assertEqual(
4810 msg.get_filename(),
4811 'This%20is%20even%20more%20***fun*** is it not.pdf')
4812
4813 def test_rfc2231_partly_nonencoded(self):
4814 m = '''\
4815Content-Disposition: inline;
4816\tfilename*0="This%20is%20even%20more%20";
4817\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4818\tfilename*2="is it not.pdf"
4819
4820'''
4821 msg = email.message_from_string(m)
4822 self.assertEqual(
4823 msg.get_filename(),
4824 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4825
4826 def test_rfc2231_no_language_or_charset_in_boundary(self):
4827 m = '''\
4828Content-Type: multipart/alternative;
4829\tboundary*0*="''This%20is%20even%20more%20";
4830\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4831\tboundary*2="is it not.pdf"
4832
4833'''
4834 msg = email.message_from_string(m)
4835 self.assertEqual(msg.get_boundary(),
4836 'This is even more ***fun*** is it not.pdf')
4837
4838 def test_rfc2231_no_language_or_charset_in_charset(self):
4839 # This is a nonsensical charset value, but tests the code anyway
4840 m = '''\
4841Content-Type: text/plain;
4842\tcharset*0*="This%20is%20even%20more%20";
4843\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4844\tcharset*2="is it not.pdf"
4845
4846'''
4847 msg = email.message_from_string(m)
4848 self.assertEqual(msg.get_content_charset(),
4849 'this is even more ***fun*** is it not.pdf')
4850
R David Murray97f43c02012-06-24 05:03:27 -04004851 # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004852 def test_rfc2231_bad_encoding_in_filename(self):
4853 m = '''\
4854Content-Disposition: inline;
4855\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4856\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4857\tfilename*2="is it not.pdf"
4858
4859'''
4860 msg = email.message_from_string(m)
4861 self.assertEqual(msg.get_filename(),
4862 'This is even more ***fun*** is it not.pdf')
4863
4864 def test_rfc2231_bad_encoding_in_charset(self):
4865 m = """\
4866Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4867
4868"""
4869 msg = email.message_from_string(m)
4870 # This should return None because non-ascii characters in the charset
4871 # are not allowed.
4872 self.assertEqual(msg.get_content_charset(), None)
4873
4874 def test_rfc2231_bad_character_in_charset(self):
4875 m = """\
4876Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4877
4878"""
4879 msg = email.message_from_string(m)
4880 # This should return None because non-ascii characters in the charset
4881 # are not allowed.
4882 self.assertEqual(msg.get_content_charset(), None)
4883
4884 def test_rfc2231_bad_character_in_filename(self):
4885 m = '''\
4886Content-Disposition: inline;
4887\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4888\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4889\tfilename*2*="is it not.pdf%E2"
4890
4891'''
4892 msg = email.message_from_string(m)
4893 self.assertEqual(msg.get_filename(),
4894 'This is even more ***fun*** is it not.pdf\ufffd')
4895
4896 def test_rfc2231_unknown_encoding(self):
4897 m = """\
4898Content-Transfer-Encoding: 8bit
4899Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4900
4901"""
4902 msg = email.message_from_string(m)
4903 self.assertEqual(msg.get_filename(), 'myfile.txt')
4904
4905 def test_rfc2231_single_tick_in_filename_extended(self):
4906 eq = self.assertEqual
4907 m = """\
4908Content-Type: application/x-foo;
4909\tname*0*=\"Frank's\"; name*1*=\" Document\"
4910
4911"""
4912 msg = email.message_from_string(m)
4913 charset, language, s = msg.get_param('name')
4914 eq(charset, None)
4915 eq(language, None)
4916 eq(s, "Frank's Document")
4917
R David Murray97f43c02012-06-24 05:03:27 -04004918 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004919 def test_rfc2231_single_tick_in_filename(self):
4920 m = """\
4921Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4922
4923"""
4924 msg = email.message_from_string(m)
4925 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004926 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004927 self.assertEqual(param, "Frank's Document")
4928
R David Murray97f43c02012-06-24 05:03:27 -04004929 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004930 def test_rfc2231_tick_attack_extended(self):
4931 eq = self.assertEqual
4932 m = """\
4933Content-Type: application/x-foo;
4934\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4935
4936"""
4937 msg = email.message_from_string(m)
4938 charset, language, s = msg.get_param('name')
4939 eq(charset, 'us-ascii')
4940 eq(language, 'en-us')
4941 eq(s, "Frank's Document")
4942
R David Murray97f43c02012-06-24 05:03:27 -04004943 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004944 def test_rfc2231_tick_attack(self):
4945 m = """\
4946Content-Type: application/x-foo;
4947\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4948
4949"""
4950 msg = email.message_from_string(m)
4951 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004952 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004953 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4954
R David Murray97f43c02012-06-24 05:03:27 -04004955 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004956 def test_rfc2231_no_extended_values(self):
4957 eq = self.assertEqual
4958 m = """\
4959Content-Type: application/x-foo; name=\"Frank's Document\"
4960
4961"""
4962 msg = email.message_from_string(m)
4963 eq(msg.get_param('name'), "Frank's Document")
4964
R David Murray97f43c02012-06-24 05:03:27 -04004965 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004966 def test_rfc2231_encoded_then_unencoded_segments(self):
4967 eq = self.assertEqual
4968 m = """\
4969Content-Type: application/x-foo;
4970\tname*0*=\"us-ascii'en-us'My\";
4971\tname*1=\" Document\";
4972\tname*2*=\" For You\"
4973
4974"""
4975 msg = email.message_from_string(m)
4976 charset, language, s = msg.get_param('name')
4977 eq(charset, 'us-ascii')
4978 eq(language, 'en-us')
4979 eq(s, 'My Document For You')
4980
R David Murray97f43c02012-06-24 05:03:27 -04004981 # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments
4982 # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004983 def test_rfc2231_unencoded_then_encoded_segments(self):
4984 eq = self.assertEqual
4985 m = """\
4986Content-Type: application/x-foo;
4987\tname*0=\"us-ascii'en-us'My\";
4988\tname*1*=\" Document\";
4989\tname*2*=\" For You\"
4990
4991"""
4992 msg = email.message_from_string(m)
4993 charset, language, s = msg.get_param('name')
4994 eq(charset, 'us-ascii')
4995 eq(language, 'en-us')
4996 eq(s, 'My Document For You')
4997
4998
Ezio Melottib3aedd42010-11-20 19:04:17 +00004999
R. David Murraya8f480f2010-01-16 18:30:03 +00005000# Tests to ensure that signed parts of an email are completely preserved, as
5001# required by RFC1847 section 2.1. Note that these are incomplete, because the
5002# email package does not currently always preserve the body. See issue 1670765.
5003class TestSigned(TestEmailBase):
5004
5005 def _msg_and_obj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -04005006 with openfile(filename) as fp:
R. David Murraya8f480f2010-01-16 18:30:03 +00005007 original = fp.read()
5008 msg = email.message_from_string(original)
5009 return original, msg
5010
5011 def _signed_parts_eq(self, original, result):
5012 # Extract the first mime part of each message
5013 import re
5014 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
5015 inpart = repart.search(original).group(2)
5016 outpart = repart.search(result).group(2)
5017 self.assertEqual(outpart, inpart)
5018
5019 def test_long_headers_as_string(self):
5020 original, msg = self._msg_and_obj('msg_45.txt')
5021 result = msg.as_string()
5022 self._signed_parts_eq(original, result)
5023
5024 def test_long_headers_as_string_maxheaderlen(self):
5025 original, msg = self._msg_and_obj('msg_45.txt')
5026 result = msg.as_string(maxheaderlen=60)
5027 self._signed_parts_eq(original, result)
5028
5029 def test_long_headers_flatten(self):
5030 original, msg = self._msg_and_obj('msg_45.txt')
5031 fp = StringIO()
5032 Generator(fp).flatten(msg)
5033 result = fp.getvalue()
5034 self._signed_parts_eq(original, result)
5035
5036
Ezio Melottib3aedd42010-11-20 19:04:17 +00005037
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005038if __name__ == '__main__':
R David Murray9aaba782011-03-21 17:17:06 -04005039 unittest.main()