blob: cd4f757c1f158a8ff9a99f64329df0ab8ccb84f6 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
R. David Murray719a4492010-11-21 16:53:48 +00005import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00006import time
7import base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +00008import unittest
R. David Murray96fd54e2010-10-08 15:55:28 +00009import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000010
R. David Murray96fd54e2010-10-08 15:55:28 +000011from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000012from itertools import chain
13
14import email
R David Murrayc27e5222012-05-25 15:01:48 -040015import email.policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016
17from email.charset import Charset
18from email.header import Header, decode_header, make_header
19from email.parser import Parser, HeaderParser
R David Murray638d40b2012-08-24 11:14:13 -040020from email.generator import Generator, DecodedGenerator, BytesGenerator
Guido van Rossum8b3febe2007-08-30 01:15:14 +000021from email.message import Message
22from email.mime.application import MIMEApplication
23from email.mime.audio import MIMEAudio
24from email.mime.text import MIMEText
25from email.mime.image import MIMEImage
26from email.mime.base import MIMEBase
27from email.mime.message import MIMEMessage
28from email.mime.multipart import MIMEMultipart
29from email import utils
30from email import errors
31from email import encoders
32from email import iterators
33from email import base64mime
34from email import quoprimime
35
R David Murray965794e2013-03-07 18:16:47 -050036from test.support import unlink
R David Murraya256bac2011-03-31 12:20:23 -040037from test.test_email import openfile, TestEmailBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +000038
R David Murray612528d2013-03-15 20:38:15 -040039# These imports are documented to work, but we are testing them using a
40# different path, so we import them here just to make sure they are importable.
41from email.parser import FeedParser, BytesFeedParser
42
Guido van Rossum8b3febe2007-08-30 01:15:14 +000043NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Guido van Rossum8b3febe2007-08-30 01:15:14 +000048# Test various aspects of the Message class's API
49class TestMessageAPI(TestEmailBase):
50 def test_get_all(self):
51 eq = self.assertEqual
52 msg = self._msgobj('msg_20.txt')
53 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
54 eq(msg.get_all('xx', 'n/a'), 'n/a')
55
R. David Murraye5db2632010-11-20 15:10:13 +000056 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000057 eq = self.assertEqual
58 msg = Message()
59 eq(msg.get_charset(), None)
60 charset = Charset('iso-8859-1')
61 msg.set_charset(charset)
62 eq(msg['mime-version'], '1.0')
63 eq(msg.get_content_type(), 'text/plain')
64 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
65 eq(msg.get_param('charset'), 'iso-8859-1')
66 eq(msg['content-transfer-encoding'], 'quoted-printable')
67 eq(msg.get_charset().input_charset, 'iso-8859-1')
68 # Remove the charset
69 msg.set_charset(None)
70 eq(msg.get_charset(), None)
71 eq(msg['content-type'], 'text/plain')
72 # Try adding a charset when there's already MIME headers present
73 msg = Message()
74 msg['MIME-Version'] = '2.0'
75 msg['Content-Type'] = 'text/x-weird'
76 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
77 msg.set_charset(charset)
78 eq(msg['mime-version'], '2.0')
79 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
80 eq(msg['content-transfer-encoding'], 'quinted-puntable')
81
82 def test_set_charset_from_string(self):
83 eq = self.assertEqual
84 msg = Message()
85 msg.set_charset('us-ascii')
86 eq(msg.get_charset().input_charset, 'us-ascii')
87 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
88
89 def test_set_payload_with_charset(self):
90 msg = Message()
91 charset = Charset('iso-8859-1')
92 msg.set_payload('This is a string payload', charset)
93 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
94
95 def test_get_charsets(self):
96 eq = self.assertEqual
97
98 msg = self._msgobj('msg_08.txt')
99 charsets = msg.get_charsets()
100 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
101
102 msg = self._msgobj('msg_09.txt')
103 charsets = msg.get_charsets('dingbat')
104 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
105 'koi8-r'])
106
107 msg = self._msgobj('msg_12.txt')
108 charsets = msg.get_charsets()
109 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
110 'iso-8859-3', 'us-ascii', 'koi8-r'])
111
112 def test_get_filename(self):
113 eq = self.assertEqual
114
115 msg = self._msgobj('msg_04.txt')
116 filenames = [p.get_filename() for p in msg.get_payload()]
117 eq(filenames, ['msg.txt', 'msg.txt'])
118
119 msg = self._msgobj('msg_07.txt')
120 subpart = msg.get_payload(1)
121 eq(subpart.get_filename(), 'dingusfish.gif')
122
123 def test_get_filename_with_name_parameter(self):
124 eq = self.assertEqual
125
126 msg = self._msgobj('msg_44.txt')
127 filenames = [p.get_filename() for p in msg.get_payload()]
128 eq(filenames, ['msg.txt', 'msg.txt'])
129
130 def test_get_boundary(self):
131 eq = self.assertEqual
132 msg = self._msgobj('msg_07.txt')
133 # No quotes!
134 eq(msg.get_boundary(), 'BOUNDARY')
135
136 def test_set_boundary(self):
137 eq = self.assertEqual
138 # This one has no existing boundary parameter, but the Content-Type:
139 # header appears fifth.
140 msg = self._msgobj('msg_01.txt')
141 msg.set_boundary('BOUNDARY')
142 header, value = msg.items()[4]
143 eq(header.lower(), 'content-type')
144 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
145 # This one has a Content-Type: header, with a boundary, stuck in the
146 # middle of its headers. Make sure the order is preserved; it should
147 # be fifth.
148 msg = self._msgobj('msg_04.txt')
149 msg.set_boundary('BOUNDARY')
150 header, value = msg.items()[4]
151 eq(header.lower(), 'content-type')
152 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
153 # And this one has no Content-Type: header at all.
154 msg = self._msgobj('msg_03.txt')
155 self.assertRaises(errors.HeaderParseError,
156 msg.set_boundary, 'BOUNDARY')
157
R. David Murray73a559d2010-12-21 18:07:59 +0000158 def test_make_boundary(self):
159 msg = MIMEMultipart('form-data')
160 # Note that when the boundary gets created is an implementation
161 # detail and might change.
162 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
163 # Trigger creation of boundary
164 msg.as_string()
165 self.assertEqual(msg.items()[0][1][:33],
166 'multipart/form-data; boundary="==')
167 # XXX: there ought to be tests of the uniqueness of the boundary, too.
168
R. David Murray57c45ac2010-02-21 04:39:40 +0000169 def test_message_rfc822_only(self):
170 # Issue 7970: message/rfc822 not in multipart parsed by
171 # HeaderParser caused an exception when flattened.
R David Murray28346b82011-03-31 11:40:20 -0400172 with openfile('msg_46.txt') as fp:
Brett Cannon384917a2010-10-29 23:08:36 +0000173 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000174 parser = HeaderParser()
175 msg = parser.parsestr(msgdata)
176 out = StringIO()
177 gen = Generator(out, True, 0)
178 gen.flatten(msg, False)
179 self.assertEqual(out.getvalue(), msgdata)
180
R David Murrayb35c8502011-04-13 16:46:05 -0400181 def test_byte_message_rfc822_only(self):
182 # Make sure new bytes header parser also passes this.
Terry Jan Reedy740d6b62013-08-31 17:12:21 -0400183 with openfile('msg_46.txt') as fp:
184 msgdata = fp.read().encode('ascii')
R David Murrayb35c8502011-04-13 16:46:05 -0400185 parser = email.parser.BytesHeaderParser()
186 msg = parser.parsebytes(msgdata)
187 out = BytesIO()
188 gen = email.generator.BytesGenerator(out)
189 gen.flatten(msg)
190 self.assertEqual(out.getvalue(), msgdata)
191
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000192 def test_get_decoded_payload(self):
193 eq = self.assertEqual
194 msg = self._msgobj('msg_10.txt')
195 # The outer message is a multipart
196 eq(msg.get_payload(decode=True), None)
197 # Subpart 1 is 7bit encoded
198 eq(msg.get_payload(0).get_payload(decode=True),
199 b'This is a 7bit encoded message.\n')
200 # Subpart 2 is quopri
201 eq(msg.get_payload(1).get_payload(decode=True),
202 b'\xa1This is a Quoted Printable encoded message!\n')
203 # Subpart 3 is base64
204 eq(msg.get_payload(2).get_payload(decode=True),
205 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000206 # Subpart 4 is base64 with a trailing newline, which
207 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000208 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000209 b'This is a Base64 encoded message.\n')
210 # Subpart 5 has no Content-Transfer-Encoding: header.
211 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000212 b'This has no Content-Transfer-Encoding: header.\n')
213
214 def test_get_decoded_uu_payload(self):
215 eq = self.assertEqual
216 msg = Message()
217 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
218 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
219 msg['content-transfer-encoding'] = cte
220 eq(msg.get_payload(decode=True), b'hello world')
221 # Now try some bogus data
222 msg.set_payload('foo')
223 eq(msg.get_payload(decode=True), b'foo')
224
R David Murraya2860e82011-04-16 09:20:30 -0400225 def test_get_payload_n_raises_on_non_multipart(self):
226 msg = Message()
227 self.assertRaises(TypeError, msg.get_payload, 1)
228
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000229 def test_decoded_generator(self):
230 eq = self.assertEqual
231 msg = self._msgobj('msg_07.txt')
232 with openfile('msg_17.txt') as fp:
233 text = fp.read()
234 s = StringIO()
235 g = DecodedGenerator(s)
236 g.flatten(msg)
237 eq(s.getvalue(), text)
238
239 def test__contains__(self):
240 msg = Message()
241 msg['From'] = 'Me'
242 msg['to'] = 'You'
243 # Check for case insensitivity
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +0200244 self.assertIn('from', msg)
245 self.assertIn('From', msg)
246 self.assertIn('FROM', msg)
247 self.assertIn('to', msg)
248 self.assertIn('To', msg)
249 self.assertIn('TO', msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000250
251 def test_as_string(self):
252 eq = self.ndiffAssertEqual
253 msg = self._msgobj('msg_01.txt')
254 with openfile('msg_01.txt') as fp:
255 text = fp.read()
256 eq(text, str(msg))
257 fullrepr = msg.as_string(unixfrom=True)
258 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000259 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000260 eq(text, NL.join(lines[1:]))
261
R David Murray97f43c02012-06-24 05:03:27 -0400262 # test_headerregistry.TestContentTypeHeader.bad_params
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000263 def test_bad_param(self):
264 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
265 self.assertEqual(msg.get_param('baz'), '')
266
267 def test_missing_filename(self):
268 msg = email.message_from_string("From: foo\n")
269 self.assertEqual(msg.get_filename(), None)
270
271 def test_bogus_filename(self):
272 msg = email.message_from_string(
273 "Content-Disposition: blarg; filename\n")
274 self.assertEqual(msg.get_filename(), '')
275
276 def test_missing_boundary(self):
277 msg = email.message_from_string("From: foo\n")
278 self.assertEqual(msg.get_boundary(), None)
279
280 def test_get_params(self):
281 eq = self.assertEqual
282 msg = email.message_from_string(
283 'X-Header: foo=one; bar=two; baz=three\n')
284 eq(msg.get_params(header='x-header'),
285 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
286 msg = email.message_from_string(
287 'X-Header: foo; bar=one; baz=two\n')
288 eq(msg.get_params(header='x-header'),
289 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
290 eq(msg.get_params(), None)
291 msg = email.message_from_string(
292 'X-Header: foo; bar="one"; baz=two\n')
293 eq(msg.get_params(header='x-header'),
294 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
295
R David Murray97f43c02012-06-24 05:03:27 -0400296 # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000297 def test_get_param_liberal(self):
298 msg = Message()
299 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
300 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
301
302 def test_get_param(self):
303 eq = self.assertEqual
304 msg = email.message_from_string(
305 "X-Header: foo=one; bar=two; baz=three\n")
306 eq(msg.get_param('bar', header='x-header'), 'two')
307 eq(msg.get_param('quuz', header='x-header'), None)
308 eq(msg.get_param('quuz'), None)
309 msg = email.message_from_string(
310 'X-Header: foo; bar="one"; baz=two\n')
311 eq(msg.get_param('foo', header='x-header'), '')
312 eq(msg.get_param('bar', header='x-header'), 'one')
313 eq(msg.get_param('baz', header='x-header'), 'two')
314 # XXX: We are not RFC-2045 compliant! We cannot parse:
315 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
316 # msg.get_param("weird")
317 # yet.
318
R David Murray97f43c02012-06-24 05:03:27 -0400319 # test_headerregistry.TestContentTypeHeader.spaces_around_semis
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000320 def test_get_param_funky_continuation_lines(self):
321 msg = self._msgobj('msg_22.txt')
322 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
323
R David Murray97f43c02012-06-24 05:03:27 -0400324 # test_headerregistry.TestContentTypeHeader.semis_inside_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000325 def test_get_param_with_semis_in_quotes(self):
326 msg = email.message_from_string(
327 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
328 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
329 self.assertEqual(msg.get_param('name', unquote=False),
330 '"Jim&amp;&amp;Jill"')
331
R David Murray97f43c02012-06-24 05:03:27 -0400332 # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value
R. David Murrayd48739f2010-04-14 18:59:18 +0000333 def test_get_param_with_quotes(self):
334 msg = email.message_from_string(
335 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
336 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
337 msg = email.message_from_string(
338 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
339 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
340
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000341 def test_field_containment(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000342 msg = email.message_from_string('Header: exists')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +0200343 self.assertIn('header', msg)
344 self.assertIn('Header', msg)
345 self.assertIn('HEADER', msg)
346 self.assertNotIn('headerx', msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000347
348 def test_set_param(self):
349 eq = self.assertEqual
350 msg = Message()
351 msg.set_param('charset', 'iso-2022-jp')
352 eq(msg.get_param('charset'), 'iso-2022-jp')
353 msg.set_param('importance', 'high value')
354 eq(msg.get_param('importance'), 'high value')
355 eq(msg.get_param('importance', unquote=False), '"high value"')
356 eq(msg.get_params(), [('text/plain', ''),
357 ('charset', 'iso-2022-jp'),
358 ('importance', 'high value')])
359 eq(msg.get_params(unquote=False), [('text/plain', ''),
360 ('charset', '"iso-2022-jp"'),
361 ('importance', '"high value"')])
362 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
363 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
364
365 def test_del_param(self):
366 eq = self.assertEqual
367 msg = self._msgobj('msg_05.txt')
368 eq(msg.get_params(),
369 [('multipart/report', ''), ('report-type', 'delivery-status'),
370 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
371 old_val = msg.get_param("report-type")
372 msg.del_param("report-type")
373 eq(msg.get_params(),
374 [('multipart/report', ''),
375 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
376 msg.set_param("report-type", old_val)
377 eq(msg.get_params(),
378 [('multipart/report', ''),
379 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
380 ('report-type', old_val)])
381
382 def test_del_param_on_other_header(self):
383 msg = Message()
384 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
385 msg.del_param('filename', 'content-disposition')
386 self.assertEqual(msg['content-disposition'], 'attachment')
387
R David Murraya2860e82011-04-16 09:20:30 -0400388 def test_del_param_on_nonexistent_header(self):
389 msg = Message()
R David Murray271ade82013-07-25 12:11:55 -0400390 # Deleting param on empty msg should not raise exception.
R David Murraya2860e82011-04-16 09:20:30 -0400391 msg.del_param('filename', 'content-disposition')
392
393 def test_del_nonexistent_param(self):
394 msg = Message()
395 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
396 existing_header = msg['Content-Type']
397 msg.del_param('foobar', header='Content-Type')
R David Murray271ade82013-07-25 12:11:55 -0400398 self.assertEqual(msg['Content-Type'], existing_header)
R David Murraya2860e82011-04-16 09:20:30 -0400399
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000400 def test_set_type(self):
401 eq = self.assertEqual
402 msg = Message()
403 self.assertRaises(ValueError, msg.set_type, 'text')
404 msg.set_type('text/plain')
405 eq(msg['content-type'], 'text/plain')
406 msg.set_param('charset', 'us-ascii')
407 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
408 msg.set_type('text/html')
409 eq(msg['content-type'], 'text/html; charset="us-ascii"')
410
411 def test_set_type_on_other_header(self):
412 msg = Message()
413 msg['X-Content-Type'] = 'text/plain'
414 msg.set_type('application/octet-stream', 'X-Content-Type')
415 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
416
417 def test_get_content_type_missing(self):
418 msg = Message()
419 self.assertEqual(msg.get_content_type(), 'text/plain')
420
421 def test_get_content_type_missing_with_default_type(self):
422 msg = Message()
423 msg.set_default_type('message/rfc822')
424 self.assertEqual(msg.get_content_type(), 'message/rfc822')
425
426 def test_get_content_type_from_message_implicit(self):
427 msg = self._msgobj('msg_30.txt')
428 self.assertEqual(msg.get_payload(0).get_content_type(),
429 'message/rfc822')
430
431 def test_get_content_type_from_message_explicit(self):
432 msg = self._msgobj('msg_28.txt')
433 self.assertEqual(msg.get_payload(0).get_content_type(),
434 'message/rfc822')
435
436 def test_get_content_type_from_message_text_plain_implicit(self):
437 msg = self._msgobj('msg_03.txt')
438 self.assertEqual(msg.get_content_type(), 'text/plain')
439
440 def test_get_content_type_from_message_text_plain_explicit(self):
441 msg = self._msgobj('msg_01.txt')
442 self.assertEqual(msg.get_content_type(), 'text/plain')
443
444 def test_get_content_maintype_missing(self):
445 msg = Message()
446 self.assertEqual(msg.get_content_maintype(), 'text')
447
448 def test_get_content_maintype_missing_with_default_type(self):
449 msg = Message()
450 msg.set_default_type('message/rfc822')
451 self.assertEqual(msg.get_content_maintype(), 'message')
452
453 def test_get_content_maintype_from_message_implicit(self):
454 msg = self._msgobj('msg_30.txt')
455 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
456
457 def test_get_content_maintype_from_message_explicit(self):
458 msg = self._msgobj('msg_28.txt')
459 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
460
461 def test_get_content_maintype_from_message_text_plain_implicit(self):
462 msg = self._msgobj('msg_03.txt')
463 self.assertEqual(msg.get_content_maintype(), 'text')
464
465 def test_get_content_maintype_from_message_text_plain_explicit(self):
466 msg = self._msgobj('msg_01.txt')
467 self.assertEqual(msg.get_content_maintype(), 'text')
468
469 def test_get_content_subtype_missing(self):
470 msg = Message()
471 self.assertEqual(msg.get_content_subtype(), 'plain')
472
473 def test_get_content_subtype_missing_with_default_type(self):
474 msg = Message()
475 msg.set_default_type('message/rfc822')
476 self.assertEqual(msg.get_content_subtype(), 'rfc822')
477
478 def test_get_content_subtype_from_message_implicit(self):
479 msg = self._msgobj('msg_30.txt')
480 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
481
482 def test_get_content_subtype_from_message_explicit(self):
483 msg = self._msgobj('msg_28.txt')
484 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
485
486 def test_get_content_subtype_from_message_text_plain_implicit(self):
487 msg = self._msgobj('msg_03.txt')
488 self.assertEqual(msg.get_content_subtype(), 'plain')
489
490 def test_get_content_subtype_from_message_text_plain_explicit(self):
491 msg = self._msgobj('msg_01.txt')
492 self.assertEqual(msg.get_content_subtype(), 'plain')
493
494 def test_get_content_maintype_error(self):
495 msg = Message()
496 msg['Content-Type'] = 'no-slash-in-this-string'
497 self.assertEqual(msg.get_content_maintype(), 'text')
498
499 def test_get_content_subtype_error(self):
500 msg = Message()
501 msg['Content-Type'] = 'no-slash-in-this-string'
502 self.assertEqual(msg.get_content_subtype(), 'plain')
503
504 def test_replace_header(self):
505 eq = self.assertEqual
506 msg = Message()
507 msg.add_header('First', 'One')
508 msg.add_header('Second', 'Two')
509 msg.add_header('Third', 'Three')
510 eq(msg.keys(), ['First', 'Second', 'Third'])
511 eq(msg.values(), ['One', 'Two', 'Three'])
512 msg.replace_header('Second', 'Twenty')
513 eq(msg.keys(), ['First', 'Second', 'Third'])
514 eq(msg.values(), ['One', 'Twenty', 'Three'])
515 msg.add_header('First', 'Eleven')
516 msg.replace_header('First', 'One Hundred')
517 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
518 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
519 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
520
R David Murray80e0aee2012-05-27 21:23:34 -0400521 # test_defect_handling:test_invalid_chars_in_base64_payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000522 def test_broken_base64_payload(self):
523 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
524 msg = Message()
525 msg['content-type'] = 'audio/x-midi'
526 msg['content-transfer-encoding'] = 'base64'
527 msg.set_payload(x)
528 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -0400529 (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0'
530 b'\xa1\x00p\xf6\xbf\xe9\x0f'))
531 self.assertIsInstance(msg.defects[0],
532 errors.InvalidBase64CharactersDefect)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000533
R David Murraya2860e82011-04-16 09:20:30 -0400534 def test_broken_unicode_payload(self):
535 # This test improves coverage but is not a compliance test.
536 # The behavior in this situation is currently undefined by the API.
537 x = 'this is a br\xf6ken thing to do'
538 msg = Message()
539 msg['content-type'] = 'text/plain'
540 msg['content-transfer-encoding'] = '8bit'
541 msg.set_payload(x)
542 self.assertEqual(msg.get_payload(decode=True),
543 bytes(x, 'raw-unicode-escape'))
544
545 def test_questionable_bytes_payload(self):
546 # This test improves coverage but is not a compliance test,
547 # since it involves poking inside the black box.
548 x = 'this is a quéstionable thing to do'.encode('utf-8')
549 msg = Message()
550 msg['content-type'] = 'text/plain; charset="utf-8"'
551 msg['content-transfer-encoding'] = '8bit'
552 msg._payload = x
553 self.assertEqual(msg.get_payload(decode=True), x)
554
R. David Murray7ec754b2010-12-13 23:51:19 +0000555 # Issue 1078919
556 def test_ascii_add_header(self):
557 msg = Message()
558 msg.add_header('Content-Disposition', 'attachment',
559 filename='bud.gif')
560 self.assertEqual('attachment; filename="bud.gif"',
561 msg['Content-Disposition'])
562
563 def test_noascii_add_header(self):
564 msg = Message()
565 msg.add_header('Content-Disposition', 'attachment',
566 filename="Fußballer.ppt")
567 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000568 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000569 msg['Content-Disposition'])
570
571 def test_nonascii_add_header_via_triple(self):
572 msg = Message()
573 msg.add_header('Content-Disposition', 'attachment',
574 filename=('iso-8859-1', '', 'Fußballer.ppt'))
575 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000576 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
577 msg['Content-Disposition'])
578
579 def test_ascii_add_header_with_tspecial(self):
580 msg = Message()
581 msg.add_header('Content-Disposition', 'attachment',
582 filename="windows [filename].ppt")
583 self.assertEqual(
584 'attachment; filename="windows [filename].ppt"',
585 msg['Content-Disposition'])
586
587 def test_nonascii_add_header_with_tspecial(self):
588 msg = Message()
589 msg.add_header('Content-Disposition', 'attachment',
590 filename="Fußballer [filename].ppt")
591 self.assertEqual(
592 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000593 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000594
R David Murray00ae4352013-08-21 21:10:31 -0400595 def test_binary_quopri_payload(self):
596 for charset in ('latin-1', 'ascii'):
597 msg = Message()
598 msg['content-type'] = 'text/plain; charset=%s' % charset
599 msg['content-transfer-encoding'] = 'quoted-printable'
600 msg.set_payload(b'foo=e6=96=87bar')
601 self.assertEqual(
602 msg.get_payload(decode=True),
603 b'foo\xe6\x96\x87bar',
604 'get_payload returns wrong result with charset %s.' % charset)
605
606 def test_binary_base64_payload(self):
607 for charset in ('latin-1', 'ascii'):
608 msg = Message()
609 msg['content-type'] = 'text/plain; charset=%s' % charset
610 msg['content-transfer-encoding'] = 'base64'
611 msg.set_payload(b'Zm9v5paHYmFy')
612 self.assertEqual(
613 msg.get_payload(decode=True),
614 b'foo\xe6\x96\x87bar',
615 'get_payload returns wrong result with charset %s.' % charset)
616
617 def test_binary_uuencode_payload(self):
618 for charset in ('latin-1', 'ascii'):
619 for encoding in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
620 msg = Message()
621 msg['content-type'] = 'text/plain; charset=%s' % charset
622 msg['content-transfer-encoding'] = encoding
623 msg.set_payload(b"begin 666 -\n)9F]OYI:'8F%R\n \nend\n")
624 self.assertEqual(
625 msg.get_payload(decode=True),
626 b'foo\xe6\x96\x87bar',
627 str(('get_payload returns wrong result ',
628 'with charset {0} and encoding {1}.')).\
629 format(charset, encoding))
630
R David Murraya2860e82011-04-16 09:20:30 -0400631 def test_add_header_with_name_only_param(self):
632 msg = Message()
633 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
634 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
635
636 def test_add_header_with_no_value(self):
637 msg = Message()
638 msg.add_header('X-Status', None)
639 self.assertEqual('', msg['X-Status'])
640
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000641 # Issue 5871: reject an attempt to embed a header inside a header value
642 # (header injection attack).
643 def test_embeded_header_via_Header_rejected(self):
644 msg = Message()
645 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
646 self.assertRaises(errors.HeaderParseError, msg.as_string)
647
648 def test_embeded_header_via_string_rejected(self):
649 msg = Message()
650 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
651 self.assertRaises(errors.HeaderParseError, msg.as_string)
652
R David Murray7441a7a2012-03-14 02:59:51 -0400653 def test_unicode_header_defaults_to_utf8_encoding(self):
654 # Issue 14291
655 m = MIMEText('abc\n')
656 m['Subject'] = 'É test'
657 self.assertEqual(str(m),textwrap.dedent("""\
658 Content-Type: text/plain; charset="us-ascii"
659 MIME-Version: 1.0
660 Content-Transfer-Encoding: 7bit
661 Subject: =?utf-8?q?=C3=89_test?=
662
663 abc
664 """))
665
R David Murray8680bcc2012-03-22 22:17:51 -0400666 def test_unicode_body_defaults_to_utf8_encoding(self):
667 # Issue 14291
668 m = MIMEText('É testabc\n')
669 self.assertEqual(str(m),textwrap.dedent("""\
R David Murray8680bcc2012-03-22 22:17:51 -0400670 Content-Type: text/plain; charset="utf-8"
R David Murray42243c42012-03-22 22:40:44 -0400671 MIME-Version: 1.0
R David Murray8680bcc2012-03-22 22:17:51 -0400672 Content-Transfer-Encoding: base64
673
674 w4kgdGVzdGFiYwo=
675 """))
676
677
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000678# Test the email.encoders module
679class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400680
681 def test_EncodersEncode_base64(self):
682 with openfile('PyBanner048.gif', 'rb') as fp:
683 bindata = fp.read()
684 mimed = email.mime.image.MIMEImage(bindata)
685 base64ed = mimed.get_payload()
686 # the transfer-encoded body lines should all be <=76 characters
687 lines = base64ed.split('\n')
688 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
689
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000690 def test_encode_empty_payload(self):
691 eq = self.assertEqual
692 msg = Message()
693 msg.set_charset('us-ascii')
694 eq(msg['content-transfer-encoding'], '7bit')
695
696 def test_default_cte(self):
697 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000698 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000699 msg = MIMEText('hello world')
700 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000701 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000702 msg = MIMEText('hello \xf8 world')
R David Murray8680bcc2012-03-22 22:17:51 -0400703 eq(msg['content-transfer-encoding'], 'base64')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000704 # And now with a different charset
705 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
706 eq(msg['content-transfer-encoding'], 'quoted-printable')
707
R. David Murraye85200d2010-05-06 01:41:14 +0000708 def test_encode7or8bit(self):
709 # Make sure a charset whose input character set is 8bit but
710 # whose output character set is 7bit gets a transfer-encoding
711 # of 7bit.
712 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000713 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000714 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000715
R David Murrayf581b372013-02-05 10:49:49 -0500716 def test_qp_encode_latin1(self):
717 msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1')
718 self.assertEqual(str(msg), textwrap.dedent("""\
719 MIME-Version: 1.0
720 Content-Type: text/text; charset="iso-8859-1"
721 Content-Transfer-Encoding: quoted-printable
722
723 =E1=F6
724 """))
725
726 def test_qp_encode_non_latin1(self):
727 # Issue 16948
728 msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2')
729 self.assertEqual(str(msg), textwrap.dedent("""\
730 MIME-Version: 1.0
731 Content-Type: text/text; charset="iso-8859-2"
732 Content-Transfer-Encoding: quoted-printable
733
734 =BF
735 """))
736
Ezio Melottib3aedd42010-11-20 19:04:17 +0000737
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000738# Test long header wrapping
739class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400740
741 maxDiff = None
742
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000743 def test_split_long_continuation(self):
744 eq = self.ndiffAssertEqual
745 msg = email.message_from_string("""\
746Subject: bug demonstration
747\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
748\tmore text
749
750test
751""")
752 sfp = StringIO()
753 g = Generator(sfp)
754 g.flatten(msg)
755 eq(sfp.getvalue(), """\
756Subject: bug demonstration
757\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
758\tmore text
759
760test
761""")
762
763 def test_another_long_almost_unsplittable_header(self):
764 eq = self.ndiffAssertEqual
765 hstr = """\
766bug demonstration
767\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
768\tmore text"""
769 h = Header(hstr, continuation_ws='\t')
770 eq(h.encode(), """\
771bug demonstration
772\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
773\tmore text""")
774 h = Header(hstr.replace('\t', ' '))
775 eq(h.encode(), """\
776bug demonstration
777 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
778 more text""")
779
780 def test_long_nonstring(self):
781 eq = self.ndiffAssertEqual
782 g = Charset("iso-8859-1")
783 cz = Charset("iso-8859-2")
784 utf8 = Charset("utf-8")
785 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
786 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
787 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
788 b'bef\xf6rdert. ')
789 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
790 b'd\xf9vtipu.. ')
791 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
792 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
793 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
794 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
795 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
796 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
797 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
798 '\u3044\u307e\u3059\u3002')
799 h = Header(g_head, g, header_name='Subject')
800 h.append(cz_head, cz)
801 h.append(utf8_head, utf8)
802 msg = Message()
803 msg['Subject'] = h
804 sfp = StringIO()
805 g = Generator(sfp)
806 g.flatten(msg)
807 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000808Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
809 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
810 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
811 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
812 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
813 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
814 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
815 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
816 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
817 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
818 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000819
820""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000821 eq(h.encode(maxlinelen=76), """\
822=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
823 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
824 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
825 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
826 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
827 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
828 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
829 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
830 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
831 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
832 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000833
834 def test_long_header_encode(self):
835 eq = self.ndiffAssertEqual
836 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
837 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
838 header_name='X-Foobar-Spoink-Defrobnit')
839 eq(h.encode(), '''\
840wasnipoop; giraffes="very-long-necked-animals";
841 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
842
843 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
844 eq = self.ndiffAssertEqual
845 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
846 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
847 header_name='X-Foobar-Spoink-Defrobnit',
848 continuation_ws='\t')
849 eq(h.encode(), '''\
850wasnipoop; giraffes="very-long-necked-animals";
851 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
852
853 def test_long_header_encode_with_tab_continuation(self):
854 eq = self.ndiffAssertEqual
855 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
856 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
857 header_name='X-Foobar-Spoink-Defrobnit',
858 continuation_ws='\t')
859 eq(h.encode(), '''\
860wasnipoop; giraffes="very-long-necked-animals";
861\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
862
R David Murray3a6152f2011-03-14 21:13:03 -0400863 def test_header_encode_with_different_output_charset(self):
864 h = Header('文', 'euc-jp')
865 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
866
867 def test_long_header_encode_with_different_output_charset(self):
868 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
869 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
870 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
871 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
872 res = """\
873=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
874 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
875 self.assertEqual(h.encode(), res)
876
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000877 def test_header_splitter(self):
878 eq = self.ndiffAssertEqual
879 msg = MIMEText('')
880 # It'd be great if we could use add_header() here, but that doesn't
881 # guarantee an order of the parameters.
882 msg['X-Foobar-Spoink-Defrobnit'] = (
883 'wasnipoop; giraffes="very-long-necked-animals"; '
884 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
885 sfp = StringIO()
886 g = Generator(sfp)
887 g.flatten(msg)
888 eq(sfp.getvalue(), '''\
889Content-Type: text/plain; charset="us-ascii"
890MIME-Version: 1.0
891Content-Transfer-Encoding: 7bit
892X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
893 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
894
895''')
896
897 def test_no_semis_header_splitter(self):
898 eq = self.ndiffAssertEqual
899 msg = Message()
900 msg['From'] = 'test@dom.ain'
901 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
902 msg.set_payload('Test')
903 sfp = StringIO()
904 g = Generator(sfp)
905 g.flatten(msg)
906 eq(sfp.getvalue(), """\
907From: test@dom.ain
908References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
909 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
910
911Test""")
912
R David Murray7da4db12011-04-07 20:37:17 -0400913 def test_last_split_chunk_does_not_fit(self):
914 eq = self.ndiffAssertEqual
915 h = Header('Subject: the first part of this is short, but_the_second'
916 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
917 '_all_by_itself')
918 eq(h.encode(), """\
919Subject: the first part of this is short,
920 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
921
922 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
923 eq = self.ndiffAssertEqual
924 h = Header(', but_the_second'
925 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
926 '_all_by_itself')
927 eq(h.encode(), """\
928,
929 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
930
931 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
932 eq = self.ndiffAssertEqual
933 h = Header(', , but_the_second'
934 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
935 '_all_by_itself')
936 eq(h.encode(), """\
937, ,
938 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
939
940 def test_trailing_splitable_on_overlong_unsplitable(self):
941 eq = self.ndiffAssertEqual
942 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
943 'be_on_a_line_all_by_itself;')
944 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
945 "be_on_a_line_all_by_itself;")
946
947 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
948 eq = self.ndiffAssertEqual
949 h = Header('; '
950 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400951 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400952 eq(h.encode(), """\
953;
R David Murray01581ee2011-04-18 10:04:34 -0400954 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400955
R David Murraye1292a22011-04-07 20:54:03 -0400956 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400957 eq = self.ndiffAssertEqual
958 h = Header('This is a long line that has two whitespaces in a row. '
959 'This used to cause truncation of the header when folded')
960 eq(h.encode(), """\
961This is a long line that has two whitespaces in a row. This used to cause
962 truncation of the header when folded""")
963
Ezio Melotti1c4810b2013-08-10 18:57:12 +0300964 def test_splitter_split_on_punctuation_only_if_fws_with_header(self):
R David Murray01581ee2011-04-18 10:04:34 -0400965 eq = self.ndiffAssertEqual
966 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
967 'they;arenotlegal;fold,points')
968 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
969 "arenotlegal;fold,points")
970
971 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
972 eq = self.ndiffAssertEqual
973 h = Header('this is a test where we need to have more than one line '
974 'before; our final line that is just too big to fit;; '
975 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
976 'be_on_a_line_all_by_itself;')
977 eq(h.encode(), """\
978this is a test where we need to have more than one line before;
979 our final line that is just too big to fit;;
980 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
981
982 def test_overlong_last_part_followed_by_split_point(self):
983 eq = self.ndiffAssertEqual
984 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
985 'be_on_a_line_all_by_itself ')
986 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
987 "should_be_on_a_line_all_by_itself ")
988
989 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
990 eq = self.ndiffAssertEqual
991 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
992 'before_our_final_line_; ; '
993 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
994 'be_on_a_line_all_by_itself; ')
995 eq(h.encode(), """\
996this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
997 ;
998 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
999
1000 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
1001 eq = self.ndiffAssertEqual
1002 h = Header('this is a test where we need to have more than one line '
1003 'before our final line; ; '
1004 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1005 'be_on_a_line_all_by_itself; ')
1006 eq(h.encode(), """\
1007this is a test where we need to have more than one line before our final line;
1008 ;
1009 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1010
1011 def test_long_header_with_whitespace_runs(self):
1012 eq = self.ndiffAssertEqual
1013 msg = Message()
1014 msg['From'] = 'test@dom.ain'
1015 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
1016 msg.set_payload('Test')
1017 sfp = StringIO()
1018 g = Generator(sfp)
1019 g.flatten(msg)
1020 eq(sfp.getvalue(), """\
1021From: test@dom.ain
1022References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1023 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1024 <foo@dom.ain> <foo@dom.ain>\x20\x20
1025
1026Test""")
1027
1028 def test_long_run_with_semi_header_splitter(self):
1029 eq = self.ndiffAssertEqual
1030 msg = Message()
1031 msg['From'] = 'test@dom.ain'
1032 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
1033 msg.set_payload('Test')
1034 sfp = StringIO()
1035 g = Generator(sfp)
1036 g.flatten(msg)
1037 eq(sfp.getvalue(), """\
1038From: test@dom.ain
1039References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1040 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1041 <foo@dom.ain>; abc
1042
1043Test""")
1044
1045 def test_splitter_split_on_punctuation_only_if_fws(self):
1046 eq = self.ndiffAssertEqual
1047 msg = Message()
1048 msg['From'] = 'test@dom.ain'
1049 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
1050 'they;arenotlegal;fold,points')
1051 msg.set_payload('Test')
1052 sfp = StringIO()
1053 g = Generator(sfp)
1054 g.flatten(msg)
1055 # XXX the space after the header should not be there.
1056 eq(sfp.getvalue(), """\
1057From: test@dom.ain
1058References:\x20
1059 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
1060
1061Test""")
1062
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001063 def test_no_split_long_header(self):
1064 eq = self.ndiffAssertEqual
1065 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +00001066 h = Header(hstr)
1067 # These come on two lines because Headers are really field value
1068 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001069 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001070References:
1071 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1072 h = Header('x' * 80)
1073 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001074
1075 def test_splitting_multiple_long_lines(self):
1076 eq = self.ndiffAssertEqual
1077 hstr = """\
1078from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1079\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1080\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1081"""
1082 h = Header(hstr, continuation_ws='\t')
1083 eq(h.encode(), """\
1084from babylon.socal-raves.org (localhost [127.0.0.1]);
1085 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1086 for <mailman-admin@babylon.socal-raves.org>;
1087 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1088\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1089 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1090 for <mailman-admin@babylon.socal-raves.org>;
1091 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1092\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1093 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1094 for <mailman-admin@babylon.socal-raves.org>;
1095 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1096
1097 def test_splitting_first_line_only_is_long(self):
1098 eq = self.ndiffAssertEqual
1099 hstr = """\
1100from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1101\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1102\tid 17k4h5-00034i-00
1103\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1104 h = Header(hstr, maxlinelen=78, header_name='Received',
1105 continuation_ws='\t')
1106 eq(h.encode(), """\
1107from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1108 helo=cthulhu.gerg.ca)
1109\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1110\tid 17k4h5-00034i-00
1111\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1112
1113 def test_long_8bit_header(self):
1114 eq = self.ndiffAssertEqual
1115 msg = Message()
1116 h = Header('Britische Regierung gibt', 'iso-8859-1',
1117 header_name='Subject')
1118 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001119 eq(h.encode(maxlinelen=76), """\
1120=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1121 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001122 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001123 eq(msg.as_string(maxheaderlen=76), """\
1124Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1125 =?iso-8859-1?q?hore-Windkraftprojekte?=
1126
1127""")
1128 eq(msg.as_string(maxheaderlen=0), """\
1129Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001130
1131""")
1132
1133 def test_long_8bit_header_no_charset(self):
1134 eq = self.ndiffAssertEqual
1135 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001136 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1137 'f\xfcr Offshore-Windkraftprojekte '
1138 '<a-very-long-address@example.com>')
1139 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001140 eq(msg.as_string(maxheaderlen=78), """\
1141Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1142 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1143
1144""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001145 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001146 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001147 header_name='Reply-To')
1148 eq(msg.as_string(maxheaderlen=78), """\
1149Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1150 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001151
1152""")
1153
1154 def test_long_to_header(self):
1155 eq = self.ndiffAssertEqual
1156 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001157 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001158 '"Someone Test #B" <someone@umich.edu>, '
1159 '"Someone Test #C" <someone@eecs.umich.edu>, '
1160 '"Someone Test #D" <someone@eecs.umich.edu>')
1161 msg = Message()
1162 msg['To'] = to
1163 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001164To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001165 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001166 "Someone Test #C" <someone@eecs.umich.edu>,
1167 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001168
1169''')
1170
1171 def test_long_line_after_append(self):
1172 eq = self.ndiffAssertEqual
1173 s = 'This is an example of string which has almost the limit of header length.'
1174 h = Header(s)
1175 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001176 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001177This is an example of string which has almost the limit of header length.
1178 Add another line.""")
1179
1180 def test_shorter_line_with_append(self):
1181 eq = self.ndiffAssertEqual
1182 s = 'This is a shorter line.'
1183 h = Header(s)
1184 h.append('Add another sentence. (Surprise?)')
1185 eq(h.encode(),
1186 'This is a shorter line. Add another sentence. (Surprise?)')
1187
1188 def test_long_field_name(self):
1189 eq = self.ndiffAssertEqual
1190 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001191 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1192 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1193 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1194 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001195 h = Header(gs, 'iso-8859-1', header_name=fn)
1196 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001197 eq(h.encode(maxlinelen=76), """\
1198=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1199 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1200 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1201 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001202
1203 def test_long_received_header(self):
1204 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1205 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1206 'Wed, 05 Mar 2003 18:10:18 -0700')
1207 msg = Message()
1208 msg['Received-1'] = Header(h, continuation_ws='\t')
1209 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001210 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001211 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001212Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1213 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001214 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001215Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1216 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001217 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001218
1219""")
1220
1221 def test_string_headerinst_eq(self):
1222 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1223 'tu-muenchen.de> (David Bremner\'s message of '
1224 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1225 msg = Message()
1226 msg['Received-1'] = Header(h, header_name='Received-1',
1227 continuation_ws='\t')
1228 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001229 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001230 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001231Received-1:\x20
1232 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1233 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1234Received-2:\x20
1235 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1236 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001237
1238""")
1239
1240 def test_long_unbreakable_lines_with_continuation(self):
1241 eq = self.ndiffAssertEqual
1242 msg = Message()
1243 t = """\
1244iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1245 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1246 msg['Face-1'] = t
1247 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001248 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001249 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001250 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001251 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001252Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001253 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001254 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001255Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001256 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001257 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001258Face-3:\x20
1259 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1260 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001261
1262""")
1263
1264 def test_another_long_multiline_header(self):
1265 eq = self.ndiffAssertEqual
1266 m = ('Received: from siimage.com '
1267 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001268 'Microsoft SMTPSVC(5.0.2195.4905); '
1269 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001270 msg = email.message_from_string(m)
1271 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001272Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1273 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001274
1275''')
1276
1277 def test_long_lines_with_different_header(self):
1278 eq = self.ndiffAssertEqual
1279 h = ('List-Unsubscribe: '
1280 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1281 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1282 '?subject=unsubscribe>')
1283 msg = Message()
1284 msg['List'] = h
1285 msg['List'] = Header(h, header_name='List')
1286 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001287List: List-Unsubscribe:
1288 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001289 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001290List: List-Unsubscribe:
1291 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001292 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001293
1294""")
1295
R. David Murray6f0022d2011-01-07 21:57:25 +00001296 def test_long_rfc2047_header_with_embedded_fws(self):
1297 h = Header(textwrap.dedent("""\
1298 We're going to pretend this header is in a non-ascii character set
1299 \tto see if line wrapping with encoded words and embedded
1300 folding white space works"""),
1301 charset='utf-8',
1302 header_name='Test')
1303 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1304 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1305 =?utf-8?q?cter_set?=
1306 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1307 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1308
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001309
Ezio Melottib3aedd42010-11-20 19:04:17 +00001310
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001311# Test mangling of "From " lines in the body of a message
1312class TestFromMangling(unittest.TestCase):
1313 def setUp(self):
1314 self.msg = Message()
1315 self.msg['From'] = 'aaa@bbb.org'
1316 self.msg.set_payload("""\
1317From the desk of A.A.A.:
1318Blah blah blah
1319""")
1320
1321 def test_mangled_from(self):
1322 s = StringIO()
1323 g = Generator(s, mangle_from_=True)
1324 g.flatten(self.msg)
1325 self.assertEqual(s.getvalue(), """\
1326From: aaa@bbb.org
1327
1328>From the desk of A.A.A.:
1329Blah blah blah
1330""")
1331
1332 def test_dont_mangle_from(self):
1333 s = StringIO()
1334 g = Generator(s, mangle_from_=False)
1335 g.flatten(self.msg)
1336 self.assertEqual(s.getvalue(), """\
1337From: aaa@bbb.org
1338
1339From the desk of A.A.A.:
1340Blah blah blah
1341""")
1342
R David Murray6a31bc62012-07-22 21:47:53 -04001343 def test_mangle_from_in_preamble_and_epilog(self):
1344 s = StringIO()
1345 g = Generator(s, mangle_from_=True)
1346 msg = email.message_from_string(textwrap.dedent("""\
1347 From: foo@bar.com
1348 Mime-Version: 1.0
1349 Content-Type: multipart/mixed; boundary=XXX
1350
1351 From somewhere unknown
1352
1353 --XXX
1354 Content-Type: text/plain
1355
1356 foo
1357
1358 --XXX--
1359
1360 From somewhere unknowable
1361 """))
1362 g.flatten(msg)
1363 self.assertEqual(len([1 for x in s.getvalue().split('\n')
1364 if x.startswith('>From ')]), 2)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001365
R David Murray638d40b2012-08-24 11:14:13 -04001366 def test_mangled_from_with_bad_bytes(self):
1367 source = textwrap.dedent("""\
1368 Content-Type: text/plain; charset="utf-8"
1369 MIME-Version: 1.0
1370 Content-Transfer-Encoding: 8bit
1371 From: aaa@bbb.org
1372
1373 """).encode('utf-8')
1374 msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')
1375 b = BytesIO()
1376 g = BytesGenerator(b, mangle_from_=True)
1377 g.flatten(msg)
1378 self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')
1379
Ezio Melottib3aedd42010-11-20 19:04:17 +00001380
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001381# Test the basic MIMEAudio class
1382class TestMIMEAudio(unittest.TestCase):
1383 def setUp(self):
R David Murray28346b82011-03-31 11:40:20 -04001384 with openfile('audiotest.au', 'rb') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001385 self._audiodata = fp.read()
1386 self._au = MIMEAudio(self._audiodata)
1387
1388 def test_guess_minor_type(self):
1389 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1390
1391 def test_encoding(self):
1392 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001393 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1394 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001395
1396 def test_checkSetMinor(self):
1397 au = MIMEAudio(self._audiodata, 'fish')
1398 self.assertEqual(au.get_content_type(), 'audio/fish')
1399
1400 def test_add_header(self):
1401 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001402 self._au.add_header('Content-Disposition', 'attachment',
1403 filename='audiotest.au')
1404 eq(self._au['content-disposition'],
1405 'attachment; filename="audiotest.au"')
1406 eq(self._au.get_params(header='content-disposition'),
1407 [('attachment', ''), ('filename', 'audiotest.au')])
1408 eq(self._au.get_param('filename', header='content-disposition'),
1409 'audiotest.au')
1410 missing = []
1411 eq(self._au.get_param('attachment', header='content-disposition'), '')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02001412 self.assertIs(self._au.get_param('foo', failobj=missing,
1413 header='content-disposition'), missing)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001414 # Try some missing stuff
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02001415 self.assertIs(self._au.get_param('foobar', missing), missing)
1416 self.assertIs(self._au.get_param('attachment', missing,
1417 header='foobar'), missing)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001418
1419
Ezio Melottib3aedd42010-11-20 19:04:17 +00001420
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001421# Test the basic MIMEImage class
1422class TestMIMEImage(unittest.TestCase):
1423 def setUp(self):
1424 with openfile('PyBanner048.gif', 'rb') as fp:
1425 self._imgdata = fp.read()
1426 self._im = MIMEImage(self._imgdata)
1427
1428 def test_guess_minor_type(self):
1429 self.assertEqual(self._im.get_content_type(), 'image/gif')
1430
1431 def test_encoding(self):
1432 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001433 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1434 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001435
1436 def test_checkSetMinor(self):
1437 im = MIMEImage(self._imgdata, 'fish')
1438 self.assertEqual(im.get_content_type(), 'image/fish')
1439
1440 def test_add_header(self):
1441 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001442 self._im.add_header('Content-Disposition', 'attachment',
1443 filename='dingusfish.gif')
1444 eq(self._im['content-disposition'],
1445 'attachment; filename="dingusfish.gif"')
1446 eq(self._im.get_params(header='content-disposition'),
1447 [('attachment', ''), ('filename', 'dingusfish.gif')])
1448 eq(self._im.get_param('filename', header='content-disposition'),
1449 'dingusfish.gif')
1450 missing = []
1451 eq(self._im.get_param('attachment', header='content-disposition'), '')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02001452 self.assertIs(self._im.get_param('foo', failobj=missing,
1453 header='content-disposition'), missing)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001454 # Try some missing stuff
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02001455 self.assertIs(self._im.get_param('foobar', missing), missing)
1456 self.assertIs(self._im.get_param('attachment', missing,
1457 header='foobar'), missing)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001458
1459
Ezio Melottib3aedd42010-11-20 19:04:17 +00001460
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001461# Test the basic MIMEApplication class
1462class TestMIMEApplication(unittest.TestCase):
1463 def test_headers(self):
1464 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001465 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001466 eq(msg.get_content_type(), 'application/octet-stream')
1467 eq(msg['content-transfer-encoding'], 'base64')
1468
1469 def test_body(self):
1470 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001471 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1472 msg = MIMEApplication(bytesdata)
1473 # whitespace in the cte encoded block is RFC-irrelevant.
1474 eq(msg.get_payload().strip(), '+vv8/f7/')
1475 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001476
R David Murrayec317a82013-02-11 10:51:28 -05001477 def test_binary_body_with_encode_7or8bit(self):
1478 # Issue 17171.
1479 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1480 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit)
1481 # Treated as a string, this will be invalid code points.
1482 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1483 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1484 self.assertEqual(msg['Content-Transfer-Encoding'], '8bit')
1485 s = BytesIO()
1486 g = BytesGenerator(s)
1487 g.flatten(msg)
1488 wireform = s.getvalue()
1489 msg2 = email.message_from_bytes(wireform)
1490 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1491 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1492 self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit')
1493
1494 def test_binary_body_with_encode_noop(self):
R David Murrayceaa8b12013-02-09 13:02:58 -05001495 # Issue 16564: This does not produce an RFC valid message, since to be
1496 # valid it should have a CTE of binary. But the below works in
1497 # Python2, and is documented as working this way.
1498 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1499 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1500 # Treated as a string, this will be invalid code points.
1501 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1502 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1503 s = BytesIO()
1504 g = BytesGenerator(s)
1505 g.flatten(msg)
1506 wireform = s.getvalue()
1507 msg2 = email.message_from_bytes(wireform)
1508 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1509 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001510
R David Murrayf6069f92013-06-27 18:37:00 -04001511 def test_binary_body_with_encode_quopri(self):
1512 # Issue 14360.
1513 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff '
1514 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_quopri)
1515 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1516 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1517 self.assertEqual(msg['Content-Transfer-Encoding'], 'quoted-printable')
1518 s = BytesIO()
1519 g = BytesGenerator(s)
1520 g.flatten(msg)
1521 wireform = s.getvalue()
1522 msg2 = email.message_from_bytes(wireform)
1523 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1524 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1525 self.assertEqual(msg2['Content-Transfer-Encoding'], 'quoted-printable')
1526
1527 def test_binary_body_with_encode_base64(self):
1528 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1529 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_base64)
1530 self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1531 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1532 s = BytesIO()
1533 g = BytesGenerator(s)
1534 g.flatten(msg)
1535 wireform = s.getvalue()
1536 msg2 = email.message_from_bytes(wireform)
1537 self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1538 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1539
Ezio Melottib3aedd42010-11-20 19:04:17 +00001540
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001541# Test the basic MIMEText class
1542class TestMIMEText(unittest.TestCase):
1543 def setUp(self):
1544 self._msg = MIMEText('hello there')
1545
1546 def test_types(self):
1547 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001548 eq(self._msg.get_content_type(), 'text/plain')
1549 eq(self._msg.get_param('charset'), 'us-ascii')
1550 missing = []
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02001551 self.assertIs(self._msg.get_param('foobar', missing), missing)
1552 self.assertIs(self._msg.get_param('charset', missing, header='foobar'),
1553 missing)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001554
1555 def test_payload(self):
1556 self.assertEqual(self._msg.get_payload(), 'hello there')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02001557 self.assertFalse(self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001558
1559 def test_charset(self):
1560 eq = self.assertEqual
1561 msg = MIMEText('hello there', _charset='us-ascii')
1562 eq(msg.get_charset().input_charset, 'us-ascii')
1563 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1564
R. David Murray850fc852010-06-03 01:58:28 +00001565 def test_7bit_input(self):
1566 eq = self.assertEqual
1567 msg = MIMEText('hello there', _charset='us-ascii')
1568 eq(msg.get_charset().input_charset, 'us-ascii')
1569 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1570
1571 def test_7bit_input_no_charset(self):
1572 eq = self.assertEqual
1573 msg = MIMEText('hello there')
1574 eq(msg.get_charset(), 'us-ascii')
1575 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02001576 self.assertIn('hello there', msg.as_string())
R. David Murray850fc852010-06-03 01:58:28 +00001577
1578 def test_utf8_input(self):
1579 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1580 eq = self.assertEqual
1581 msg = MIMEText(teststr, _charset='utf-8')
1582 eq(msg.get_charset().output_charset, 'utf-8')
1583 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1584 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1585
1586 @unittest.skip("can't fix because of backward compat in email5, "
1587 "will fix in email6")
1588 def test_utf8_input_no_charset(self):
1589 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1590 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1591
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001592
Ezio Melottib3aedd42010-11-20 19:04:17 +00001593
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001594# Test complicated multipart/* messages
1595class TestMultipart(TestEmailBase):
1596 def setUp(self):
1597 with openfile('PyBanner048.gif', 'rb') as fp:
1598 data = fp.read()
1599 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1600 image = MIMEImage(data, name='dingusfish.gif')
1601 image.add_header('content-disposition', 'attachment',
1602 filename='dingusfish.gif')
1603 intro = MIMEText('''\
1604Hi there,
1605
1606This is the dingus fish.
1607''')
1608 container.attach(intro)
1609 container.attach(image)
1610 container['From'] = 'Barry <barry@digicool.com>'
1611 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1612 container['Subject'] = 'Here is your dingus fish'
1613
1614 now = 987809702.54848599
1615 timetuple = time.localtime(now)
1616 if timetuple[-1] == 0:
1617 tzsecs = time.timezone
1618 else:
1619 tzsecs = time.altzone
1620 if tzsecs > 0:
1621 sign = '-'
1622 else:
1623 sign = '+'
1624 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1625 container['Date'] = time.strftime(
1626 '%a, %d %b %Y %H:%M:%S',
1627 time.localtime(now)) + tzoffset
1628 self._msg = container
1629 self._im = image
1630 self._txt = intro
1631
1632 def test_hierarchy(self):
1633 # convenience
1634 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001635 raises = self.assertRaises
1636 # tests
1637 m = self._msg
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02001638 self.assertTrue(m.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001639 eq(m.get_content_type(), 'multipart/mixed')
1640 eq(len(m.get_payload()), 2)
1641 raises(IndexError, m.get_payload, 2)
1642 m0 = m.get_payload(0)
1643 m1 = m.get_payload(1)
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02001644 self.assertIs(m0, self._txt)
1645 self.assertIs(m1, self._im)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001646 eq(m.get_payload(), [m0, m1])
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02001647 self.assertFalse(m0.is_multipart())
1648 self.assertFalse(m1.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001649
1650 def test_empty_multipart_idempotent(self):
1651 text = """\
1652Content-Type: multipart/mixed; boundary="BOUNDARY"
1653MIME-Version: 1.0
1654Subject: A subject
1655To: aperson@dom.ain
1656From: bperson@dom.ain
1657
1658
1659--BOUNDARY
1660
1661
1662--BOUNDARY--
1663"""
1664 msg = Parser().parsestr(text)
1665 self.ndiffAssertEqual(text, msg.as_string())
1666
1667 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1668 outer = MIMEBase('multipart', 'mixed')
1669 outer['Subject'] = 'A subject'
1670 outer['To'] = 'aperson@dom.ain'
1671 outer['From'] = 'bperson@dom.ain'
1672 outer.set_boundary('BOUNDARY')
1673 self.ndiffAssertEqual(outer.as_string(), '''\
1674Content-Type: multipart/mixed; boundary="BOUNDARY"
1675MIME-Version: 1.0
1676Subject: A subject
1677To: aperson@dom.ain
1678From: bperson@dom.ain
1679
1680--BOUNDARY
1681
1682--BOUNDARY--''')
1683
1684 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1685 outer = MIMEBase('multipart', 'mixed')
1686 outer['Subject'] = 'A subject'
1687 outer['To'] = 'aperson@dom.ain'
1688 outer['From'] = 'bperson@dom.ain'
1689 outer.preamble = ''
1690 outer.epilogue = ''
1691 outer.set_boundary('BOUNDARY')
1692 self.ndiffAssertEqual(outer.as_string(), '''\
1693Content-Type: multipart/mixed; boundary="BOUNDARY"
1694MIME-Version: 1.0
1695Subject: A subject
1696To: aperson@dom.ain
1697From: bperson@dom.ain
1698
1699
1700--BOUNDARY
1701
1702--BOUNDARY--
1703''')
1704
1705 def test_one_part_in_a_multipart(self):
1706 eq = self.ndiffAssertEqual
1707 outer = MIMEBase('multipart', 'mixed')
1708 outer['Subject'] = 'A subject'
1709 outer['To'] = 'aperson@dom.ain'
1710 outer['From'] = 'bperson@dom.ain'
1711 outer.set_boundary('BOUNDARY')
1712 msg = MIMEText('hello world')
1713 outer.attach(msg)
1714 eq(outer.as_string(), '''\
1715Content-Type: multipart/mixed; boundary="BOUNDARY"
1716MIME-Version: 1.0
1717Subject: A subject
1718To: aperson@dom.ain
1719From: bperson@dom.ain
1720
1721--BOUNDARY
1722Content-Type: text/plain; charset="us-ascii"
1723MIME-Version: 1.0
1724Content-Transfer-Encoding: 7bit
1725
1726hello world
1727--BOUNDARY--''')
1728
1729 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1730 eq = self.ndiffAssertEqual
1731 outer = MIMEBase('multipart', 'mixed')
1732 outer['Subject'] = 'A subject'
1733 outer['To'] = 'aperson@dom.ain'
1734 outer['From'] = 'bperson@dom.ain'
1735 outer.preamble = ''
1736 msg = MIMEText('hello world')
1737 outer.attach(msg)
1738 outer.set_boundary('BOUNDARY')
1739 eq(outer.as_string(), '''\
1740Content-Type: multipart/mixed; boundary="BOUNDARY"
1741MIME-Version: 1.0
1742Subject: A subject
1743To: aperson@dom.ain
1744From: bperson@dom.ain
1745
1746
1747--BOUNDARY
1748Content-Type: text/plain; charset="us-ascii"
1749MIME-Version: 1.0
1750Content-Transfer-Encoding: 7bit
1751
1752hello world
1753--BOUNDARY--''')
1754
1755
1756 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1757 eq = self.ndiffAssertEqual
1758 outer = MIMEBase('multipart', 'mixed')
1759 outer['Subject'] = 'A subject'
1760 outer['To'] = 'aperson@dom.ain'
1761 outer['From'] = 'bperson@dom.ain'
1762 outer.preamble = None
1763 msg = MIMEText('hello world')
1764 outer.attach(msg)
1765 outer.set_boundary('BOUNDARY')
1766 eq(outer.as_string(), '''\
1767Content-Type: multipart/mixed; boundary="BOUNDARY"
1768MIME-Version: 1.0
1769Subject: A subject
1770To: aperson@dom.ain
1771From: bperson@dom.ain
1772
1773--BOUNDARY
1774Content-Type: text/plain; charset="us-ascii"
1775MIME-Version: 1.0
1776Content-Transfer-Encoding: 7bit
1777
1778hello world
1779--BOUNDARY--''')
1780
1781
1782 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1783 eq = self.ndiffAssertEqual
1784 outer = MIMEBase('multipart', 'mixed')
1785 outer['Subject'] = 'A subject'
1786 outer['To'] = 'aperson@dom.ain'
1787 outer['From'] = 'bperson@dom.ain'
1788 outer.epilogue = None
1789 msg = MIMEText('hello world')
1790 outer.attach(msg)
1791 outer.set_boundary('BOUNDARY')
1792 eq(outer.as_string(), '''\
1793Content-Type: multipart/mixed; boundary="BOUNDARY"
1794MIME-Version: 1.0
1795Subject: A subject
1796To: aperson@dom.ain
1797From: bperson@dom.ain
1798
1799--BOUNDARY
1800Content-Type: text/plain; charset="us-ascii"
1801MIME-Version: 1.0
1802Content-Transfer-Encoding: 7bit
1803
1804hello world
1805--BOUNDARY--''')
1806
1807
1808 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1809 eq = self.ndiffAssertEqual
1810 outer = MIMEBase('multipart', 'mixed')
1811 outer['Subject'] = 'A subject'
1812 outer['To'] = 'aperson@dom.ain'
1813 outer['From'] = 'bperson@dom.ain'
1814 outer.epilogue = ''
1815 msg = MIMEText('hello world')
1816 outer.attach(msg)
1817 outer.set_boundary('BOUNDARY')
1818 eq(outer.as_string(), '''\
1819Content-Type: multipart/mixed; boundary="BOUNDARY"
1820MIME-Version: 1.0
1821Subject: A subject
1822To: aperson@dom.ain
1823From: bperson@dom.ain
1824
1825--BOUNDARY
1826Content-Type: text/plain; charset="us-ascii"
1827MIME-Version: 1.0
1828Content-Transfer-Encoding: 7bit
1829
1830hello world
1831--BOUNDARY--
1832''')
1833
1834
1835 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1836 eq = self.ndiffAssertEqual
1837 outer = MIMEBase('multipart', 'mixed')
1838 outer['Subject'] = 'A subject'
1839 outer['To'] = 'aperson@dom.ain'
1840 outer['From'] = 'bperson@dom.ain'
1841 outer.epilogue = '\n'
1842 msg = MIMEText('hello world')
1843 outer.attach(msg)
1844 outer.set_boundary('BOUNDARY')
1845 eq(outer.as_string(), '''\
1846Content-Type: multipart/mixed; boundary="BOUNDARY"
1847MIME-Version: 1.0
1848Subject: A subject
1849To: aperson@dom.ain
1850From: bperson@dom.ain
1851
1852--BOUNDARY
1853Content-Type: text/plain; charset="us-ascii"
1854MIME-Version: 1.0
1855Content-Transfer-Encoding: 7bit
1856
1857hello world
1858--BOUNDARY--
1859
1860''')
1861
1862 def test_message_external_body(self):
1863 eq = self.assertEqual
1864 msg = self._msgobj('msg_36.txt')
1865 eq(len(msg.get_payload()), 2)
1866 msg1 = msg.get_payload(1)
1867 eq(msg1.get_content_type(), 'multipart/alternative')
1868 eq(len(msg1.get_payload()), 2)
1869 for subpart in msg1.get_payload():
1870 eq(subpart.get_content_type(), 'message/external-body')
1871 eq(len(subpart.get_payload()), 1)
1872 subsubpart = subpart.get_payload(0)
1873 eq(subsubpart.get_content_type(), 'text/plain')
1874
1875 def test_double_boundary(self):
1876 # msg_37.txt is a multipart that contains two dash-boundary's in a
1877 # row. Our interpretation of RFC 2046 calls for ignoring the second
1878 # and subsequent boundaries.
1879 msg = self._msgobj('msg_37.txt')
1880 self.assertEqual(len(msg.get_payload()), 3)
1881
1882 def test_nested_inner_contains_outer_boundary(self):
1883 eq = self.ndiffAssertEqual
1884 # msg_38.txt has an inner part that contains outer boundaries. My
1885 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1886 # these are illegal and should be interpreted as unterminated inner
1887 # parts.
1888 msg = self._msgobj('msg_38.txt')
1889 sfp = StringIO()
1890 iterators._structure(msg, sfp)
1891 eq(sfp.getvalue(), """\
1892multipart/mixed
1893 multipart/mixed
1894 multipart/alternative
1895 text/plain
1896 text/plain
1897 text/plain
1898 text/plain
1899""")
1900
1901 def test_nested_with_same_boundary(self):
1902 eq = self.ndiffAssertEqual
1903 # msg 39.txt is similarly evil in that it's got inner parts that use
1904 # the same boundary as outer parts. Again, I believe the way this is
1905 # parsed is closest to the spirit of RFC 2046
1906 msg = self._msgobj('msg_39.txt')
1907 sfp = StringIO()
1908 iterators._structure(msg, sfp)
1909 eq(sfp.getvalue(), """\
1910multipart/mixed
1911 multipart/mixed
1912 multipart/alternative
1913 application/octet-stream
1914 application/octet-stream
1915 text/plain
1916""")
1917
1918 def test_boundary_in_non_multipart(self):
1919 msg = self._msgobj('msg_40.txt')
1920 self.assertEqual(msg.as_string(), '''\
1921MIME-Version: 1.0
1922Content-Type: text/html; boundary="--961284236552522269"
1923
1924----961284236552522269
1925Content-Type: text/html;
1926Content-Transfer-Encoding: 7Bit
1927
1928<html></html>
1929
1930----961284236552522269--
1931''')
1932
1933 def test_boundary_with_leading_space(self):
1934 eq = self.assertEqual
1935 msg = email.message_from_string('''\
1936MIME-Version: 1.0
1937Content-Type: multipart/mixed; boundary=" XXXX"
1938
1939-- XXXX
1940Content-Type: text/plain
1941
1942
1943-- XXXX
1944Content-Type: text/plain
1945
1946-- XXXX--
1947''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001948 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001949 eq(msg.get_boundary(), ' XXXX')
1950 eq(len(msg.get_payload()), 2)
1951
1952 def test_boundary_without_trailing_newline(self):
1953 m = Parser().parsestr("""\
1954Content-Type: multipart/mixed; boundary="===============0012394164=="
1955MIME-Version: 1.0
1956
1957--===============0012394164==
1958Content-Type: image/file1.jpg
1959MIME-Version: 1.0
1960Content-Transfer-Encoding: base64
1961
1962YXNkZg==
1963--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001964 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001965
1966
Ezio Melottib3aedd42010-11-20 19:04:17 +00001967
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001968# Test some badly formatted messages
R David Murrayc27e5222012-05-25 15:01:48 -04001969class TestNonConformant(TestEmailBase):
R David Murray3edd22a2011-04-18 13:59:37 -04001970
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001971 def test_parse_missing_minor_type(self):
1972 eq = self.assertEqual
1973 msg = self._msgobj('msg_14.txt')
1974 eq(msg.get_content_type(), 'text/plain')
1975 eq(msg.get_content_maintype(), 'text')
1976 eq(msg.get_content_subtype(), 'plain')
1977
R David Murray80e0aee2012-05-27 21:23:34 -04001978 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001979 def test_same_boundary_inner_outer(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001980 msg = self._msgobj('msg_15.txt')
1981 # XXX We can probably eventually do better
1982 inner = msg.get_payload(0)
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02001983 self.assertTrue(hasattr(inner, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04001984 self.assertEqual(len(inner.defects), 1)
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02001985 self.assertIsInstance(inner.defects[0],
1986 errors.StartBoundaryNotFoundDefect)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001987
R David Murray80e0aee2012-05-27 21:23:34 -04001988 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001989 def test_multipart_no_boundary(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001990 msg = self._msgobj('msg_25.txt')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02001991 self.assertIsInstance(msg.get_payload(), str)
R David Murrayc27e5222012-05-25 15:01:48 -04001992 self.assertEqual(len(msg.defects), 2)
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02001993 self.assertIsInstance(msg.defects[0],
1994 errors.NoBoundaryInMultipartDefect)
1995 self.assertIsInstance(msg.defects[1],
1996 errors.MultipartInvariantViolationDefect)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001997
R David Murray749073a2011-06-22 13:47:53 -04001998 multipart_msg = textwrap.dedent("""\
1999 Date: Wed, 14 Nov 2007 12:56:23 GMT
2000 From: foo@bar.invalid
2001 To: foo@bar.invalid
2002 Subject: Content-Transfer-Encoding: base64 and multipart
2003 MIME-Version: 1.0
2004 Content-Type: multipart/mixed;
2005 boundary="===============3344438784458119861=="{}
2006
2007 --===============3344438784458119861==
2008 Content-Type: text/plain
2009
2010 Test message
2011
2012 --===============3344438784458119861==
2013 Content-Type: application/octet-stream
2014 Content-Transfer-Encoding: base64
2015
2016 YWJj
2017
2018 --===============3344438784458119861==--
2019 """)
2020
R David Murray80e0aee2012-05-27 21:23:34 -04002021 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04002022 def test_multipart_invalid_cte(self):
R David Murrayc27e5222012-05-25 15:01:48 -04002023 msg = self._str_msg(
2024 self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
2025 self.assertEqual(len(msg.defects), 1)
2026 self.assertIsInstance(msg.defects[0],
R David Murray749073a2011-06-22 13:47:53 -04002027 errors.InvalidMultipartContentTransferEncodingDefect)
2028
R David Murray80e0aee2012-05-27 21:23:34 -04002029 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04002030 def test_multipart_no_cte_no_defect(self):
R David Murrayc27e5222012-05-25 15:01:48 -04002031 msg = self._str_msg(self.multipart_msg.format(''))
2032 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04002033
R David Murray80e0aee2012-05-27 21:23:34 -04002034 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04002035 def test_multipart_valid_cte_no_defect(self):
2036 for cte in ('7bit', '8bit', 'BINary'):
R David Murrayc27e5222012-05-25 15:01:48 -04002037 msg = self._str_msg(
R David Murray749073a2011-06-22 13:47:53 -04002038 self.multipart_msg.format(
R David Murrayc27e5222012-05-25 15:01:48 -04002039 "\nContent-Transfer-Encoding: {}".format(cte)))
2040 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04002041
R David Murray97f43c02012-06-24 05:03:27 -04002042 # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002043 def test_invalid_content_type(self):
2044 eq = self.assertEqual
2045 neq = self.ndiffAssertEqual
2046 msg = Message()
2047 # RFC 2045, $5.2 says invalid yields text/plain
2048 msg['Content-Type'] = 'text'
2049 eq(msg.get_content_maintype(), 'text')
2050 eq(msg.get_content_subtype(), 'plain')
2051 eq(msg.get_content_type(), 'text/plain')
2052 # Clear the old value and try something /really/ invalid
2053 del msg['content-type']
2054 msg['Content-Type'] = 'foo'
2055 eq(msg.get_content_maintype(), 'text')
2056 eq(msg.get_content_subtype(), 'plain')
2057 eq(msg.get_content_type(), 'text/plain')
2058 # Still, make sure that the message is idempotently generated
2059 s = StringIO()
2060 g = Generator(s)
2061 g.flatten(msg)
2062 neq(s.getvalue(), 'Content-Type: foo\n\n')
2063
2064 def test_no_start_boundary(self):
2065 eq = self.ndiffAssertEqual
2066 msg = self._msgobj('msg_31.txt')
2067 eq(msg.get_payload(), """\
2068--BOUNDARY
2069Content-Type: text/plain
2070
2071message 1
2072
2073--BOUNDARY
2074Content-Type: text/plain
2075
2076message 2
2077
2078--BOUNDARY--
2079""")
2080
2081 def test_no_separating_blank_line(self):
2082 eq = self.ndiffAssertEqual
2083 msg = self._msgobj('msg_35.txt')
2084 eq(msg.as_string(), """\
2085From: aperson@dom.ain
2086To: bperson@dom.ain
2087Subject: here's something interesting
2088
2089counter to RFC 2822, there's no separating newline here
2090""")
2091
R David Murray80e0aee2012-05-27 21:23:34 -04002092 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002093 def test_lying_multipart(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002094 msg = self._msgobj('msg_41.txt')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002095 self.assertTrue(hasattr(msg, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04002096 self.assertEqual(len(msg.defects), 2)
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002097 self.assertIsInstance(msg.defects[0],
2098 errors.NoBoundaryInMultipartDefect)
2099 self.assertIsInstance(msg.defects[1],
2100 errors.MultipartInvariantViolationDefect)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002101
R David Murray80e0aee2012-05-27 21:23:34 -04002102 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002103 def test_missing_start_boundary(self):
2104 outer = self._msgobj('msg_42.txt')
2105 # The message structure is:
2106 #
2107 # multipart/mixed
2108 # text/plain
2109 # message/rfc822
2110 # multipart/mixed [*]
2111 #
2112 # [*] This message is missing its start boundary
2113 bad = outer.get_payload(1).get_payload(0)
R David Murrayc27e5222012-05-25 15:01:48 -04002114 self.assertEqual(len(bad.defects), 1)
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002115 self.assertIsInstance(bad.defects[0],
2116 errors.StartBoundaryNotFoundDefect)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002117
R David Murray80e0aee2012-05-27 21:23:34 -04002118 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002119 def test_first_line_is_continuation_header(self):
2120 eq = self.assertEqual
R David Murrayadbdcdb2012-05-27 20:45:01 -04002121 m = ' Line 1\nSubject: test\n\nbody'
R David Murrayc27e5222012-05-25 15:01:48 -04002122 msg = email.message_from_string(m)
R David Murrayadbdcdb2012-05-27 20:45:01 -04002123 eq(msg.keys(), ['Subject'])
2124 eq(msg.get_payload(), 'body')
R David Murrayc27e5222012-05-25 15:01:48 -04002125 eq(len(msg.defects), 1)
R David Murrayadbdcdb2012-05-27 20:45:01 -04002126 self.assertDefectsEqual(msg.defects,
2127 [errors.FirstHeaderLineIsContinuationDefect])
R David Murrayc27e5222012-05-25 15:01:48 -04002128 eq(msg.defects[0].line, ' Line 1\n')
R David Murray3edd22a2011-04-18 13:59:37 -04002129
R David Murrayd41595b2012-05-28 20:14:10 -04002130 # test_defect_handling
R David Murrayadbdcdb2012-05-27 20:45:01 -04002131 def test_missing_header_body_separator(self):
2132 # Our heuristic if we see a line that doesn't look like a header (no
2133 # leading whitespace but no ':') is to assume that the blank line that
2134 # separates the header from the body is missing, and to stop parsing
2135 # headers and start parsing the body.
2136 msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
2137 self.assertEqual(msg.keys(), ['Subject'])
2138 self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
2139 self.assertDefectsEqual(msg.defects,
2140 [errors.MissingHeaderBodySeparatorDefect])
2141
Ezio Melottib3aedd42010-11-20 19:04:17 +00002142
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002143# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00002144class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002145 def test_rfc2047_multiline(self):
2146 eq = self.assertEqual
2147 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2148 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2149 dh = decode_header(s)
2150 eq(dh, [
R David Murray07ea53c2012-06-02 17:56:49 -04002151 (b'Re: ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002152 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
R David Murray07ea53c2012-06-02 17:56:49 -04002153 (b' baz foo bar ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002154 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2155 header = make_header(dh)
2156 eq(str(header),
2157 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00002158 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002159Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2160 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002161
R David Murray07ea53c2012-06-02 17:56:49 -04002162 def test_whitespace_keeper_unicode(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002163 eq = self.assertEqual
2164 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2165 dh = decode_header(s)
2166 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
R David Murray07ea53c2012-06-02 17:56:49 -04002167 (b' Pirard <pirard@dom.ain>', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002168 header = str(make_header(dh))
2169 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2170
R David Murray07ea53c2012-06-02 17:56:49 -04002171 def test_whitespace_keeper_unicode_2(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002172 eq = self.assertEqual
2173 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2174 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002175 eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'),
2176 (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002177 hu = str(make_header(dh))
2178 eq(hu, 'The quick brown fox jumped over the lazy dog')
2179
2180 def test_rfc2047_missing_whitespace(self):
2181 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2182 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002183 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2184 (b'rg', None), (b'\xe5', 'iso-8859-1'),
2185 (b'sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002186
2187 def test_rfc2047_with_whitespace(self):
2188 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2189 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002190 self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'),
2191 (b' rg ', None), (b'\xe5', 'iso-8859-1'),
2192 (b' sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002193
R. David Murrayc4e69cc2010-08-03 22:14:10 +00002194 def test_rfc2047_B_bad_padding(self):
2195 s = '=?iso-8859-1?B?%s?='
2196 data = [ # only test complete bytes
2197 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2198 ('dmk=', b'vi'), ('dmk', b'vi')
2199 ]
2200 for q, a in data:
2201 dh = decode_header(s % q)
2202 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002203
R. David Murray31e984c2010-10-01 15:40:20 +00002204 def test_rfc2047_Q_invalid_digits(self):
2205 # issue 10004.
2206 s = '=?iso-8659-1?Q?andr=e9=zz?='
2207 self.assertEqual(decode_header(s),
2208 [(b'andr\xe9=zz', 'iso-8659-1')])
2209
R David Murray07ea53c2012-06-02 17:56:49 -04002210 def test_rfc2047_rfc2047_1(self):
2211 # 1st testcase at end of rfc2047
2212 s = '(=?ISO-8859-1?Q?a?=)'
2213 self.assertEqual(decode_header(s),
2214 [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)])
2215
2216 def test_rfc2047_rfc2047_2(self):
2217 # 2nd testcase at end of rfc2047
2218 s = '(=?ISO-8859-1?Q?a?= b)'
2219 self.assertEqual(decode_header(s),
2220 [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)])
2221
2222 def test_rfc2047_rfc2047_3(self):
2223 # 3rd testcase at end of rfc2047
2224 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2225 self.assertEqual(decode_header(s),
2226 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2227
2228 def test_rfc2047_rfc2047_4(self):
2229 # 4th testcase at end of rfc2047
2230 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2231 self.assertEqual(decode_header(s),
2232 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2233
2234 def test_rfc2047_rfc2047_5a(self):
2235 # 5th testcase at end of rfc2047 newline is \r\n
2236 s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)'
2237 self.assertEqual(decode_header(s),
2238 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2239
2240 def test_rfc2047_rfc2047_5b(self):
2241 # 5th testcase at end of rfc2047 newline is \n
2242 s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)'
2243 self.assertEqual(decode_header(s),
2244 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2245
2246 def test_rfc2047_rfc2047_6(self):
2247 # 6th testcase at end of rfc2047
2248 s = '(=?ISO-8859-1?Q?a_b?=)'
2249 self.assertEqual(decode_header(s),
2250 [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)])
2251
2252 def test_rfc2047_rfc2047_7(self):
2253 # 7th testcase at end of rfc2047
2254 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)'
2255 self.assertEqual(decode_header(s),
2256 [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'),
2257 (b')', None)])
2258 self.assertEqual(make_header(decode_header(s)).encode(), s.lower())
2259 self.assertEqual(str(make_header(decode_header(s))), '(a b)')
2260
R David Murray82ffabd2012-06-03 12:27:07 -04002261 def test_multiline_header(self):
2262 s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>'
2263 self.assertEqual(decode_header(s),
2264 [(b'"M\xfcller T"', 'windows-1252'),
2265 (b'<T.Mueller@xxx.com>', None)])
2266 self.assertEqual(make_header(decode_header(s)).encode(),
2267 ''.join(s.splitlines()))
2268 self.assertEqual(str(make_header(decode_header(s))),
2269 '"Müller T" <T.Mueller@xxx.com>')
2270
Ezio Melottib3aedd42010-11-20 19:04:17 +00002271
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002272# Test the MIMEMessage class
2273class TestMIMEMessage(TestEmailBase):
2274 def setUp(self):
2275 with openfile('msg_11.txt') as fp:
2276 self._text = fp.read()
2277
2278 def test_type_error(self):
2279 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2280
2281 def test_valid_argument(self):
2282 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002283 subject = 'A sub-message'
2284 m = Message()
2285 m['Subject'] = subject
2286 r = MIMEMessage(m)
2287 eq(r.get_content_type(), 'message/rfc822')
2288 payload = r.get_payload()
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002289 self.assertIsInstance(payload, list)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002290 eq(len(payload), 1)
2291 subpart = payload[0]
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002292 self.assertIs(subpart, m)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002293 eq(subpart['subject'], subject)
2294
2295 def test_bad_multipart(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002296 msg1 = Message()
2297 msg1['Subject'] = 'subpart 1'
2298 msg2 = Message()
2299 msg2['Subject'] = 'subpart 2'
2300 r = MIMEMessage(msg1)
2301 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2302
2303 def test_generate(self):
2304 # First craft the message to be encapsulated
2305 m = Message()
2306 m['Subject'] = 'An enclosed message'
2307 m.set_payload('Here is the body of the message.\n')
2308 r = MIMEMessage(m)
2309 r['Subject'] = 'The enclosing message'
2310 s = StringIO()
2311 g = Generator(s)
2312 g.flatten(r)
2313 self.assertEqual(s.getvalue(), """\
2314Content-Type: message/rfc822
2315MIME-Version: 1.0
2316Subject: The enclosing message
2317
2318Subject: An enclosed message
2319
2320Here is the body of the message.
2321""")
2322
2323 def test_parse_message_rfc822(self):
2324 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002325 msg = self._msgobj('msg_11.txt')
2326 eq(msg.get_content_type(), 'message/rfc822')
2327 payload = msg.get_payload()
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002328 self.assertIsInstance(payload, list)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002329 eq(len(payload), 1)
2330 submsg = payload[0]
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002331 self.assertIsInstance(submsg, Message)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002332 eq(submsg['subject'], 'An enclosed message')
2333 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2334
2335 def test_dsn(self):
2336 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002337 # msg 16 is a Delivery Status Notification, see RFC 1894
2338 msg = self._msgobj('msg_16.txt')
2339 eq(msg.get_content_type(), 'multipart/report')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002340 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002341 eq(len(msg.get_payload()), 3)
2342 # Subpart 1 is a text/plain, human readable section
2343 subpart = msg.get_payload(0)
2344 eq(subpart.get_content_type(), 'text/plain')
2345 eq(subpart.get_payload(), """\
2346This report relates to a message you sent with the following header fields:
2347
2348 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2349 Date: Sun, 23 Sep 2001 20:10:55 -0700
2350 From: "Ian T. Henry" <henryi@oxy.edu>
2351 To: SoCal Raves <scr@socal-raves.org>
2352 Subject: [scr] yeah for Ians!!
2353
2354Your message cannot be delivered to the following recipients:
2355
2356 Recipient address: jangel1@cougar.noc.ucla.edu
2357 Reason: recipient reached disk quota
2358
2359""")
2360 # Subpart 2 contains the machine parsable DSN information. It
2361 # consists of two blocks of headers, represented by two nested Message
2362 # objects.
2363 subpart = msg.get_payload(1)
2364 eq(subpart.get_content_type(), 'message/delivery-status')
2365 eq(len(subpart.get_payload()), 2)
2366 # message/delivery-status should treat each block as a bunch of
2367 # headers, i.e. a bunch of Message objects.
2368 dsn1 = subpart.get_payload(0)
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002369 self.assertIsInstance(dsn1, Message)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002370 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2371 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2372 # Try a missing one <wink>
2373 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2374 dsn2 = subpart.get_payload(1)
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002375 self.assertIsInstance(dsn2, Message)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002376 eq(dsn2['action'], 'failed')
2377 eq(dsn2.get_params(header='original-recipient'),
2378 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2379 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2380 # Subpart 3 is the original message
2381 subpart = msg.get_payload(2)
2382 eq(subpart.get_content_type(), 'message/rfc822')
2383 payload = subpart.get_payload()
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002384 self.assertIsInstance(payload, list)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002385 eq(len(payload), 1)
2386 subsubpart = payload[0]
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002387 self.assertIsInstance(subsubpart, Message)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002388 eq(subsubpart.get_content_type(), 'text/plain')
2389 eq(subsubpart['message-id'],
2390 '<002001c144a6$8752e060$56104586@oxy.edu>')
2391
2392 def test_epilogue(self):
2393 eq = self.ndiffAssertEqual
2394 with openfile('msg_21.txt') as fp:
2395 text = fp.read()
2396 msg = Message()
2397 msg['From'] = 'aperson@dom.ain'
2398 msg['To'] = 'bperson@dom.ain'
2399 msg['Subject'] = 'Test'
2400 msg.preamble = 'MIME message'
2401 msg.epilogue = 'End of MIME message\n'
2402 msg1 = MIMEText('One')
2403 msg2 = MIMEText('Two')
2404 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2405 msg.attach(msg1)
2406 msg.attach(msg2)
2407 sfp = StringIO()
2408 g = Generator(sfp)
2409 g.flatten(msg)
2410 eq(sfp.getvalue(), text)
2411
2412 def test_no_nl_preamble(self):
2413 eq = self.ndiffAssertEqual
2414 msg = Message()
2415 msg['From'] = 'aperson@dom.ain'
2416 msg['To'] = 'bperson@dom.ain'
2417 msg['Subject'] = 'Test'
2418 msg.preamble = 'MIME message'
2419 msg.epilogue = ''
2420 msg1 = MIMEText('One')
2421 msg2 = MIMEText('Two')
2422 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2423 msg.attach(msg1)
2424 msg.attach(msg2)
2425 eq(msg.as_string(), """\
2426From: aperson@dom.ain
2427To: bperson@dom.ain
2428Subject: Test
2429Content-Type: multipart/mixed; boundary="BOUNDARY"
2430
2431MIME message
2432--BOUNDARY
2433Content-Type: text/plain; charset="us-ascii"
2434MIME-Version: 1.0
2435Content-Transfer-Encoding: 7bit
2436
2437One
2438--BOUNDARY
2439Content-Type: text/plain; charset="us-ascii"
2440MIME-Version: 1.0
2441Content-Transfer-Encoding: 7bit
2442
2443Two
2444--BOUNDARY--
2445""")
2446
2447 def test_default_type(self):
2448 eq = self.assertEqual
2449 with openfile('msg_30.txt') as fp:
2450 msg = email.message_from_file(fp)
2451 container1 = msg.get_payload(0)
2452 eq(container1.get_default_type(), 'message/rfc822')
2453 eq(container1.get_content_type(), 'message/rfc822')
2454 container2 = msg.get_payload(1)
2455 eq(container2.get_default_type(), 'message/rfc822')
2456 eq(container2.get_content_type(), 'message/rfc822')
2457 container1a = container1.get_payload(0)
2458 eq(container1a.get_default_type(), 'text/plain')
2459 eq(container1a.get_content_type(), 'text/plain')
2460 container2a = container2.get_payload(0)
2461 eq(container2a.get_default_type(), 'text/plain')
2462 eq(container2a.get_content_type(), 'text/plain')
2463
2464 def test_default_type_with_explicit_container_type(self):
2465 eq = self.assertEqual
2466 with openfile('msg_28.txt') as fp:
2467 msg = email.message_from_file(fp)
2468 container1 = msg.get_payload(0)
2469 eq(container1.get_default_type(), 'message/rfc822')
2470 eq(container1.get_content_type(), 'message/rfc822')
2471 container2 = msg.get_payload(1)
2472 eq(container2.get_default_type(), 'message/rfc822')
2473 eq(container2.get_content_type(), 'message/rfc822')
2474 container1a = container1.get_payload(0)
2475 eq(container1a.get_default_type(), 'text/plain')
2476 eq(container1a.get_content_type(), 'text/plain')
2477 container2a = container2.get_payload(0)
2478 eq(container2a.get_default_type(), 'text/plain')
2479 eq(container2a.get_content_type(), 'text/plain')
2480
2481 def test_default_type_non_parsed(self):
2482 eq = self.assertEqual
2483 neq = self.ndiffAssertEqual
2484 # Set up container
2485 container = MIMEMultipart('digest', 'BOUNDARY')
2486 container.epilogue = ''
2487 # Set up subparts
2488 subpart1a = MIMEText('message 1\n')
2489 subpart2a = MIMEText('message 2\n')
2490 subpart1 = MIMEMessage(subpart1a)
2491 subpart2 = MIMEMessage(subpart2a)
2492 container.attach(subpart1)
2493 container.attach(subpart2)
2494 eq(subpart1.get_content_type(), 'message/rfc822')
2495 eq(subpart1.get_default_type(), 'message/rfc822')
2496 eq(subpart2.get_content_type(), 'message/rfc822')
2497 eq(subpart2.get_default_type(), 'message/rfc822')
2498 neq(container.as_string(0), '''\
2499Content-Type: multipart/digest; boundary="BOUNDARY"
2500MIME-Version: 1.0
2501
2502--BOUNDARY
2503Content-Type: message/rfc822
2504MIME-Version: 1.0
2505
2506Content-Type: text/plain; charset="us-ascii"
2507MIME-Version: 1.0
2508Content-Transfer-Encoding: 7bit
2509
2510message 1
2511
2512--BOUNDARY
2513Content-Type: message/rfc822
2514MIME-Version: 1.0
2515
2516Content-Type: text/plain; charset="us-ascii"
2517MIME-Version: 1.0
2518Content-Transfer-Encoding: 7bit
2519
2520message 2
2521
2522--BOUNDARY--
2523''')
2524 del subpart1['content-type']
2525 del subpart1['mime-version']
2526 del subpart2['content-type']
2527 del subpart2['mime-version']
2528 eq(subpart1.get_content_type(), 'message/rfc822')
2529 eq(subpart1.get_default_type(), 'message/rfc822')
2530 eq(subpart2.get_content_type(), 'message/rfc822')
2531 eq(subpart2.get_default_type(), 'message/rfc822')
2532 neq(container.as_string(0), '''\
2533Content-Type: multipart/digest; boundary="BOUNDARY"
2534MIME-Version: 1.0
2535
2536--BOUNDARY
2537
2538Content-Type: text/plain; charset="us-ascii"
2539MIME-Version: 1.0
2540Content-Transfer-Encoding: 7bit
2541
2542message 1
2543
2544--BOUNDARY
2545
2546Content-Type: text/plain; charset="us-ascii"
2547MIME-Version: 1.0
2548Content-Transfer-Encoding: 7bit
2549
2550message 2
2551
2552--BOUNDARY--
2553''')
2554
2555 def test_mime_attachments_in_constructor(self):
2556 eq = self.assertEqual
2557 text1 = MIMEText('')
2558 text2 = MIMEText('')
2559 msg = MIMEMultipart(_subparts=(text1, text2))
2560 eq(len(msg.get_payload()), 2)
2561 eq(msg.get_payload(0), text1)
2562 eq(msg.get_payload(1), text2)
2563
Christian Heimes587c2bf2008-01-19 16:21:02 +00002564 def test_default_multipart_constructor(self):
2565 msg = MIMEMultipart()
2566 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002567
Ezio Melottib3aedd42010-11-20 19:04:17 +00002568
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002569# A general test of parser->model->generator idempotency. IOW, read a message
2570# in, parse it into a message object tree, then without touching the tree,
2571# regenerate the plain text. The original text and the transformed text
2572# should be identical. Note: that we ignore the Unix-From since that may
2573# contain a changed date.
2574class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002575
2576 linesep = '\n'
2577
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002578 def _msgobj(self, filename):
2579 with openfile(filename) as fp:
2580 data = fp.read()
2581 msg = email.message_from_string(data)
2582 return msg, data
2583
R. David Murray719a4492010-11-21 16:53:48 +00002584 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002585 eq = self.ndiffAssertEqual
2586 s = StringIO()
2587 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002588 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002589 eq(text, s.getvalue())
2590
2591 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002592 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002593 msg, text = self._msgobj('msg_01.txt')
2594 eq(msg.get_content_type(), 'text/plain')
2595 eq(msg.get_content_maintype(), 'text')
2596 eq(msg.get_content_subtype(), 'plain')
2597 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2598 eq(msg.get_param('charset'), 'us-ascii')
2599 eq(msg.preamble, None)
2600 eq(msg.epilogue, None)
2601 self._idempotent(msg, text)
2602
2603 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002604 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002605 msg, text = self._msgobj('msg_03.txt')
2606 eq(msg.get_content_type(), 'text/plain')
2607 eq(msg.get_params(), None)
2608 eq(msg.get_param('charset'), None)
2609 self._idempotent(msg, text)
2610
2611 def test_simple_multipart(self):
2612 msg, text = self._msgobj('msg_04.txt')
2613 self._idempotent(msg, text)
2614
2615 def test_MIME_digest(self):
2616 msg, text = self._msgobj('msg_02.txt')
2617 self._idempotent(msg, text)
2618
2619 def test_long_header(self):
2620 msg, text = self._msgobj('msg_27.txt')
2621 self._idempotent(msg, text)
2622
2623 def test_MIME_digest_with_part_headers(self):
2624 msg, text = self._msgobj('msg_28.txt')
2625 self._idempotent(msg, text)
2626
2627 def test_mixed_with_image(self):
2628 msg, text = self._msgobj('msg_06.txt')
2629 self._idempotent(msg, text)
2630
2631 def test_multipart_report(self):
2632 msg, text = self._msgobj('msg_05.txt')
2633 self._idempotent(msg, text)
2634
2635 def test_dsn(self):
2636 msg, text = self._msgobj('msg_16.txt')
2637 self._idempotent(msg, text)
2638
2639 def test_preamble_epilogue(self):
2640 msg, text = self._msgobj('msg_21.txt')
2641 self._idempotent(msg, text)
2642
2643 def test_multipart_one_part(self):
2644 msg, text = self._msgobj('msg_23.txt')
2645 self._idempotent(msg, text)
2646
2647 def test_multipart_no_parts(self):
2648 msg, text = self._msgobj('msg_24.txt')
2649 self._idempotent(msg, text)
2650
2651 def test_no_start_boundary(self):
2652 msg, text = self._msgobj('msg_31.txt')
2653 self._idempotent(msg, text)
2654
2655 def test_rfc2231_charset(self):
2656 msg, text = self._msgobj('msg_32.txt')
2657 self._idempotent(msg, text)
2658
2659 def test_more_rfc2231_parameters(self):
2660 msg, text = self._msgobj('msg_33.txt')
2661 self._idempotent(msg, text)
2662
2663 def test_text_plain_in_a_multipart_digest(self):
2664 msg, text = self._msgobj('msg_34.txt')
2665 self._idempotent(msg, text)
2666
2667 def test_nested_multipart_mixeds(self):
2668 msg, text = self._msgobj('msg_12a.txt')
2669 self._idempotent(msg, text)
2670
2671 def test_message_external_body_idempotent(self):
2672 msg, text = self._msgobj('msg_36.txt')
2673 self._idempotent(msg, text)
2674
R. David Murray719a4492010-11-21 16:53:48 +00002675 def test_message_delivery_status(self):
2676 msg, text = self._msgobj('msg_43.txt')
2677 self._idempotent(msg, text, unixfrom=True)
2678
R. David Murray96fd54e2010-10-08 15:55:28 +00002679 def test_message_signed_idempotent(self):
2680 msg, text = self._msgobj('msg_45.txt')
2681 self._idempotent(msg, text)
2682
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002683 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002684 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002685 # Get a message object and reset the seek pointer for other tests
2686 msg, text = self._msgobj('msg_05.txt')
2687 eq(msg.get_content_type(), 'multipart/report')
2688 # Test the Content-Type: parameters
2689 params = {}
2690 for pk, pv in msg.get_params():
2691 params[pk] = pv
2692 eq(params['report-type'], 'delivery-status')
2693 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002694 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2695 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002696 eq(len(msg.get_payload()), 3)
2697 # Make sure the subparts are what we expect
2698 msg1 = msg.get_payload(0)
2699 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002700 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002701 msg2 = msg.get_payload(1)
2702 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002703 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002704 msg3 = msg.get_payload(2)
2705 eq(msg3.get_content_type(), 'message/rfc822')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002706 self.assertIsInstance(msg3, Message)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002707 payload = msg3.get_payload()
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002708 self.assertIsInstance(payload, list)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002709 eq(len(payload), 1)
2710 msg4 = payload[0]
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002711 self.assertIsInstance(msg4, Message)
R. David Murray719a4492010-11-21 16:53:48 +00002712 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002713
2714 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002715 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002716 msg, text = self._msgobj('msg_06.txt')
2717 # Check some of the outer headers
2718 eq(msg.get_content_type(), 'message/rfc822')
2719 # Make sure the payload is a list of exactly one sub-Message, and that
2720 # that submessage has a type of text/plain
2721 payload = msg.get_payload()
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002722 self.assertIsInstance(payload, list)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002723 eq(len(payload), 1)
2724 msg1 = payload[0]
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002725 self.assertIsInstance(msg1, Message)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002726 eq(msg1.get_content_type(), 'text/plain')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002727 self.assertIsInstance(msg1.get_payload(), str)
R. David Murray719a4492010-11-21 16:53:48 +00002728 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002729
2730
Ezio Melottib3aedd42010-11-20 19:04:17 +00002731
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002732# Test various other bits of the package's functionality
2733class TestMiscellaneous(TestEmailBase):
2734 def test_message_from_string(self):
2735 with openfile('msg_01.txt') as fp:
2736 text = fp.read()
2737 msg = email.message_from_string(text)
2738 s = StringIO()
2739 # Don't wrap/continue long headers since we're trying to test
2740 # idempotency.
2741 g = Generator(s, maxheaderlen=0)
2742 g.flatten(msg)
2743 self.assertEqual(text, s.getvalue())
2744
2745 def test_message_from_file(self):
2746 with openfile('msg_01.txt') as fp:
2747 text = fp.read()
2748 fp.seek(0)
2749 msg = email.message_from_file(fp)
2750 s = StringIO()
2751 # Don't wrap/continue long headers since we're trying to test
2752 # idempotency.
2753 g = Generator(s, maxheaderlen=0)
2754 g.flatten(msg)
2755 self.assertEqual(text, s.getvalue())
2756
2757 def test_message_from_string_with_class(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002758 with openfile('msg_01.txt') as fp:
2759 text = fp.read()
2760
2761 # Create a subclass
2762 class MyMessage(Message):
2763 pass
2764
2765 msg = email.message_from_string(text, MyMessage)
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002766 self.assertIsInstance(msg, MyMessage)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002767 # Try something more complicated
2768 with openfile('msg_02.txt') as fp:
2769 text = fp.read()
2770 msg = email.message_from_string(text, MyMessage)
2771 for subpart in msg.walk():
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002772 self.assertIsInstance(subpart, MyMessage)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002773
2774 def test_message_from_file_with_class(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002775 # Create a subclass
2776 class MyMessage(Message):
2777 pass
2778
2779 with openfile('msg_01.txt') as fp:
2780 msg = email.message_from_file(fp, MyMessage)
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002781 self.assertIsInstance(msg, MyMessage)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002782 # Try something more complicated
2783 with openfile('msg_02.txt') as fp:
2784 msg = email.message_from_file(fp, MyMessage)
2785 for subpart in msg.walk():
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002786 self.assertIsInstance(subpart, MyMessage)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002787
R David Murrayc27e5222012-05-25 15:01:48 -04002788 def test_custom_message_does_not_require_arguments(self):
2789 class MyMessage(Message):
2790 def __init__(self):
2791 super().__init__()
2792 msg = self._str_msg("Subject: test\n\ntest", MyMessage)
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02002793 self.assertIsInstance(msg, MyMessage)
R David Murrayc27e5222012-05-25 15:01:48 -04002794
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002795 def test__all__(self):
2796 module = __import__('email')
R David Murray1b6c7242012-03-16 22:43:05 -04002797 self.assertEqual(sorted(module.__all__), [
2798 'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2799 'generator', 'header', 'iterators', 'message',
2800 'message_from_binary_file', 'message_from_bytes',
2801 'message_from_file', 'message_from_string', 'mime', 'parser',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002802 'quoprimime', 'utils',
2803 ])
2804
2805 def test_formatdate(self):
2806 now = time.time()
2807 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2808 time.gmtime(now)[:6])
2809
2810 def test_formatdate_localtime(self):
2811 now = time.time()
2812 self.assertEqual(
2813 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2814 time.localtime(now)[:6])
2815
2816 def test_formatdate_usegmt(self):
2817 now = time.time()
2818 self.assertEqual(
2819 utils.formatdate(now, localtime=False),
2820 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2821 self.assertEqual(
2822 utils.formatdate(now, localtime=False, usegmt=True),
2823 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2824
Georg Brandl1aca31e2012-09-22 09:03:56 +02002825 # parsedate and parsedate_tz will become deprecated interfaces someday
2826 def test_parsedate_returns_None_for_invalid_strings(self):
2827 self.assertIsNone(utils.parsedate(''))
2828 self.assertIsNone(utils.parsedate_tz(''))
2829 self.assertIsNone(utils.parsedate('0'))
2830 self.assertIsNone(utils.parsedate_tz('0'))
2831 self.assertIsNone(utils.parsedate('A Complete Waste of Time'))
2832 self.assertIsNone(utils.parsedate_tz('A Complete Waste of Time'))
2833 # Not a part of the spec but, but this has historically worked:
2834 self.assertIsNone(utils.parsedate(None))
2835 self.assertIsNone(utils.parsedate_tz(None))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002836
2837 def test_parsedate_compact(self):
2838 # The FWS after the comma is optional
2839 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2840 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2841
2842 def test_parsedate_no_dayofweek(self):
2843 eq = self.assertEqual
2844 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2845 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2846
2847 def test_parsedate_compact_no_dayofweek(self):
2848 eq = self.assertEqual
2849 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2850 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2851
R. David Murray4a62e892010-12-23 20:35:46 +00002852 def test_parsedate_no_space_before_positive_offset(self):
2853 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2854 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2855
2856 def test_parsedate_no_space_before_negative_offset(self):
2857 # Issue 1155362: we already handled '+' for this case.
2858 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2859 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2860
2861
R David Murrayaccd1c02011-03-13 20:06:23 -04002862 def test_parsedate_accepts_time_with_dots(self):
2863 eq = self.assertEqual
2864 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2865 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2866 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2867 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2868
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002869 def test_parsedate_acceptable_to_time_functions(self):
2870 eq = self.assertEqual
2871 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2872 t = int(time.mktime(timetup))
2873 eq(time.localtime(t)[:6], timetup[:6])
2874 eq(int(time.strftime('%Y', timetup)), 2003)
2875 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2876 t = int(time.mktime(timetup[:9]))
2877 eq(time.localtime(t)[:6], timetup[:6])
2878 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2879
Alexander Belopolskya07548e2012-06-21 20:34:09 -04002880 def test_mktime_tz(self):
2881 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2882 -1, -1, -1, 0)), 0)
2883 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2884 -1, -1, -1, 1234)), -1234)
2885
R. David Murray219d1c82010-08-25 00:45:55 +00002886 def test_parsedate_y2k(self):
2887 """Test for parsing a date with a two-digit year.
2888
2889 Parsing a date with a two-digit year should return the correct
2890 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2891 obsoletes RFC822) requires four-digit years.
2892
2893 """
2894 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2895 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2896 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2897 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2898
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002899 def test_parseaddr_empty(self):
2900 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2901 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2902
2903 def test_noquote_dump(self):
2904 self.assertEqual(
2905 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2906 'A Silly Person <person@dom.ain>')
2907
2908 def test_escape_dump(self):
2909 self.assertEqual(
2910 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
R David Murrayb53319f2012-03-14 15:31:47 -04002911 r'"A (Very) Silly Person" <person@dom.ain>')
2912 self.assertEqual(
2913 utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
2914 ('A (Very) Silly Person', 'person@dom.ain'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002915 a = r'A \(Special\) Person'
2916 b = 'person@dom.ain'
2917 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2918
2919 def test_escape_backslashes(self):
2920 self.assertEqual(
2921 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2922 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2923 a = r'Arthur \Backslash\ Foobar'
2924 b = 'person@dom.ain'
2925 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2926
R David Murray8debacb2011-04-06 09:35:57 -04002927 def test_quotes_unicode_names(self):
2928 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2929 name = "H\u00e4ns W\u00fcrst"
2930 addr = 'person@dom.ain'
2931 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2932 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
2933 self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
2934 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
2935 latin1_quopri)
2936
2937 def test_accepts_any_charset_like_object(self):
2938 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2939 name = "H\u00e4ns W\u00fcrst"
2940 addr = 'person@dom.ain'
2941 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2942 foobar = "FOOBAR"
2943 class CharsetMock:
2944 def header_encode(self, string):
2945 return foobar
2946 mock = CharsetMock()
2947 mock_expected = "%s <%s>" % (foobar, addr)
2948 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
2949 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
2950 utf8_base64)
2951
2952 def test_invalid_charset_like_object_raises_error(self):
2953 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2954 name = "H\u00e4ns W\u00fcrst"
2955 addr = 'person@dom.ain'
2956 # A object without a header_encode method:
2957 bad_charset = object()
2958 self.assertRaises(AttributeError, utils.formataddr, (name, addr),
2959 bad_charset)
2960
2961 def test_unicode_address_raises_error(self):
2962 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2963 addr = 'pers\u00f6n@dom.in'
2964 self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
2965 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
2966
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002967 def test_name_with_dot(self):
2968 x = 'John X. Doe <jxd@example.com>'
2969 y = '"John X. Doe" <jxd@example.com>'
2970 a, b = ('John X. Doe', 'jxd@example.com')
2971 self.assertEqual(utils.parseaddr(x), (a, b))
2972 self.assertEqual(utils.parseaddr(y), (a, b))
2973 # formataddr() quotes the name if there's a dot in it
2974 self.assertEqual(utils.formataddr((a, b)), y)
2975
R. David Murray5397e862010-10-02 15:58:26 +00002976 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2977 # issue 10005. Note that in the third test the second pair of
2978 # backslashes is not actually a quoted pair because it is not inside a
2979 # comment or quoted string: the address being parsed has a quoted
2980 # string containing a quoted backslash, followed by 'example' and two
2981 # backslashes, followed by another quoted string containing a space and
2982 # the word 'example'. parseaddr copies those two backslashes
2983 # literally. Per rfc5322 this is not technically correct since a \ may
2984 # not appear in an address outside of a quoted string. It is probably
2985 # a sensible Postel interpretation, though.
2986 eq = self.assertEqual
2987 eq(utils.parseaddr('""example" example"@example.com'),
2988 ('', '""example" example"@example.com'))
2989 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2990 ('', '"\\"example\\" example"@example.com'))
2991 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2992 ('', '"\\\\"example\\\\" example"@example.com'))
2993
R. David Murray63563cd2010-12-18 18:25:38 +00002994 def test_parseaddr_preserves_spaces_in_local_part(self):
2995 # issue 9286. A normal RFC5322 local part should not contain any
2996 # folding white space, but legacy local parts can (they are a sequence
2997 # of atoms, not dotatoms). On the other hand we strip whitespace from
2998 # before the @ and around dots, on the assumption that the whitespace
2999 # around the punctuation is a mistake in what would otherwise be
3000 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
3001 self.assertEqual(('', "merwok wok@xample.com"),
3002 utils.parseaddr("merwok wok@xample.com"))
3003 self.assertEqual(('', "merwok wok@xample.com"),
3004 utils.parseaddr("merwok wok@xample.com"))
3005 self.assertEqual(('', "merwok wok@xample.com"),
3006 utils.parseaddr(" merwok wok @xample.com"))
3007 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
3008 utils.parseaddr('merwok"wok" wok@xample.com'))
3009 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
3010 utils.parseaddr('merwok. wok . wok@xample.com'))
3011
R David Murrayb53319f2012-03-14 15:31:47 -04003012 def test_formataddr_does_not_quote_parens_in_quoted_string(self):
3013 addr = ("'foo@example.com' (foo@example.com)",
3014 'foo@example.com')
3015 addrstr = ('"\'foo@example.com\' '
3016 '(foo@example.com)" <foo@example.com>')
3017 self.assertEqual(utils.parseaddr(addrstr), addr)
3018 self.assertEqual(utils.formataddr(addr), addrstr)
3019
3020
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003021 def test_multiline_from_comment(self):
3022 x = """\
3023Foo
3024\tBar <foo@example.com>"""
3025 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
3026
3027 def test_quote_dump(self):
3028 self.assertEqual(
3029 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
3030 r'"A Silly; Person" <person@dom.ain>')
3031
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003032 def test_charset_richcomparisons(self):
3033 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003034 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003035 cset1 = Charset()
3036 cset2 = Charset()
3037 eq(cset1, 'us-ascii')
3038 eq(cset1, 'US-ASCII')
3039 eq(cset1, 'Us-AsCiI')
3040 eq('us-ascii', cset1)
3041 eq('US-ASCII', cset1)
3042 eq('Us-AsCiI', cset1)
3043 ne(cset1, 'usascii')
3044 ne(cset1, 'USASCII')
3045 ne(cset1, 'UsAsCiI')
3046 ne('usascii', cset1)
3047 ne('USASCII', cset1)
3048 ne('UsAsCiI', cset1)
3049 eq(cset1, cset2)
3050 eq(cset2, cset1)
3051
3052 def test_getaddresses(self):
3053 eq = self.assertEqual
3054 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
3055 'Bud Person <bperson@dom.ain>']),
3056 [('Al Person', 'aperson@dom.ain'),
3057 ('Bud Person', 'bperson@dom.ain')])
3058
3059 def test_getaddresses_nasty(self):
3060 eq = self.assertEqual
3061 eq(utils.getaddresses(['foo: ;']), [('', '')])
3062 eq(utils.getaddresses(
3063 ['[]*-- =~$']),
3064 [('', ''), ('', ''), ('', '*--')])
3065 eq(utils.getaddresses(
3066 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
3067 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
3068
3069 def test_getaddresses_embedded_comment(self):
3070 """Test proper handling of a nested comment"""
3071 eq = self.assertEqual
3072 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
3073 eq(addrs[0][1], 'foo@bar.com')
3074
3075 def test_utils_quote_unquote(self):
3076 eq = self.assertEqual
3077 msg = Message()
3078 msg.add_header('content-disposition', 'attachment',
3079 filename='foo\\wacky"name')
3080 eq(msg.get_filename(), 'foo\\wacky"name')
3081
3082 def test_get_body_encoding_with_bogus_charset(self):
3083 charset = Charset('not a charset')
3084 self.assertEqual(charset.get_body_encoding(), 'base64')
3085
3086 def test_get_body_encoding_with_uppercase_charset(self):
3087 eq = self.assertEqual
3088 msg = Message()
3089 msg['Content-Type'] = 'text/plain; charset=UTF-8'
3090 eq(msg['content-type'], 'text/plain; charset=UTF-8')
3091 charsets = msg.get_charsets()
3092 eq(len(charsets), 1)
3093 eq(charsets[0], 'utf-8')
3094 charset = Charset(charsets[0])
3095 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003096 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003097 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
3098 eq(msg.get_payload(decode=True), b'hello world')
3099 eq(msg['content-transfer-encoding'], 'base64')
3100 # Try another one
3101 msg = Message()
3102 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
3103 charsets = msg.get_charsets()
3104 eq(len(charsets), 1)
3105 eq(charsets[0], 'us-ascii')
3106 charset = Charset(charsets[0])
3107 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
3108 msg.set_payload('hello world', charset=charset)
3109 eq(msg.get_payload(), 'hello world')
3110 eq(msg['content-transfer-encoding'], '7bit')
3111
3112 def test_charsets_case_insensitive(self):
3113 lc = Charset('us-ascii')
3114 uc = Charset('US-ASCII')
3115 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
3116
3117 def test_partial_falls_inside_message_delivery_status(self):
3118 eq = self.ndiffAssertEqual
3119 # The Parser interface provides chunks of data to FeedParser in 8192
3120 # byte gulps. SF bug #1076485 found one of those chunks inside
3121 # message/delivery-status header block, which triggered an
3122 # unreadline() of NeedMoreData.
3123 msg = self._msgobj('msg_43.txt')
3124 sfp = StringIO()
3125 iterators._structure(msg, sfp)
3126 eq(sfp.getvalue(), """\
3127multipart/report
3128 text/plain
3129 message/delivery-status
3130 text/plain
3131 text/plain
3132 text/plain
3133 text/plain
3134 text/plain
3135 text/plain
3136 text/plain
3137 text/plain
3138 text/plain
3139 text/plain
3140 text/plain
3141 text/plain
3142 text/plain
3143 text/plain
3144 text/plain
3145 text/plain
3146 text/plain
3147 text/plain
3148 text/plain
3149 text/plain
3150 text/plain
3151 text/plain
3152 text/plain
3153 text/plain
3154 text/plain
3155 text/plain
3156 text/rfc822-headers
3157""")
3158
R. David Murraya0b44b52010-12-02 21:47:19 +00003159 def test_make_msgid_domain(self):
3160 self.assertEqual(
3161 email.utils.make_msgid(domain='testdomain-string')[-19:],
3162 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003163
R David Murraye67c6c52013-03-07 16:38:03 -05003164 def test_Generator_linend(self):
3165 # Issue 14645.
3166 with openfile('msg_26.txt', newline='\n') as f:
3167 msgtxt = f.read()
3168 msgtxt_nl = msgtxt.replace('\r\n', '\n')
3169 msg = email.message_from_string(msgtxt)
3170 s = StringIO()
3171 g = email.generator.Generator(s)
3172 g.flatten(msg)
3173 self.assertEqual(s.getvalue(), msgtxt_nl)
3174
3175 def test_BytesGenerator_linend(self):
3176 # Issue 14645.
3177 with openfile('msg_26.txt', newline='\n') as f:
3178 msgtxt = f.read()
3179 msgtxt_nl = msgtxt.replace('\r\n', '\n')
3180 msg = email.message_from_string(msgtxt_nl)
3181 s = BytesIO()
3182 g = email.generator.BytesGenerator(s)
3183 g.flatten(msg, linesep='\r\n')
3184 self.assertEqual(s.getvalue().decode('ascii'), msgtxt)
3185
3186 def test_BytesGenerator_linend_with_non_ascii(self):
3187 # Issue 14645.
3188 with openfile('msg_26.txt', 'rb') as f:
3189 msgtxt = f.read()
3190 msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6')
3191 msgtxt_nl = msgtxt.replace(b'\r\n', b'\n')
3192 msg = email.message_from_bytes(msgtxt_nl)
3193 s = BytesIO()
3194 g = email.generator.BytesGenerator(s)
3195 g.flatten(msg, linesep='\r\n')
3196 self.assertEqual(s.getvalue(), msgtxt)
3197
Ezio Melottib3aedd42010-11-20 19:04:17 +00003198
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003199# Test the iterator/generators
3200class TestIterators(TestEmailBase):
3201 def test_body_line_iterator(self):
3202 eq = self.assertEqual
3203 neq = self.ndiffAssertEqual
3204 # First a simple non-multipart message
3205 msg = self._msgobj('msg_01.txt')
3206 it = iterators.body_line_iterator(msg)
3207 lines = list(it)
3208 eq(len(lines), 6)
3209 neq(EMPTYSTRING.join(lines), msg.get_payload())
3210 # Now a more complicated multipart
3211 msg = self._msgobj('msg_02.txt')
3212 it = iterators.body_line_iterator(msg)
3213 lines = list(it)
3214 eq(len(lines), 43)
3215 with openfile('msg_19.txt') as fp:
3216 neq(EMPTYSTRING.join(lines), fp.read())
3217
3218 def test_typed_subpart_iterator(self):
3219 eq = self.assertEqual
3220 msg = self._msgobj('msg_04.txt')
3221 it = iterators.typed_subpart_iterator(msg, 'text')
3222 lines = []
3223 subparts = 0
3224 for subpart in it:
3225 subparts += 1
3226 lines.append(subpart.get_payload())
3227 eq(subparts, 2)
3228 eq(EMPTYSTRING.join(lines), """\
3229a simple kind of mirror
3230to reflect upon our own
3231a simple kind of mirror
3232to reflect upon our own
3233""")
3234
3235 def test_typed_subpart_iterator_default_type(self):
3236 eq = self.assertEqual
3237 msg = self._msgobj('msg_03.txt')
3238 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
3239 lines = []
3240 subparts = 0
3241 for subpart in it:
3242 subparts += 1
3243 lines.append(subpart.get_payload())
3244 eq(subparts, 1)
3245 eq(EMPTYSTRING.join(lines), """\
3246
3247Hi,
3248
3249Do you like this message?
3250
3251-Me
3252""")
3253
R. David Murray45bf773f2010-07-17 01:19:57 +00003254 def test_pushCR_LF(self):
3255 '''FeedParser BufferedSubFile.push() assumed it received complete
3256 line endings. A CR ending one push() followed by a LF starting
3257 the next push() added an empty line.
3258 '''
3259 imt = [
3260 ("a\r \n", 2),
3261 ("b", 0),
3262 ("c\n", 1),
3263 ("", 0),
3264 ("d\r\n", 1),
3265 ("e\r", 0),
3266 ("\nf", 1),
3267 ("\r\n", 1),
3268 ]
3269 from email.feedparser import BufferedSubFile, NeedMoreData
3270 bsf = BufferedSubFile()
3271 om = []
3272 nt = 0
3273 for il, n in imt:
3274 bsf.push(il)
3275 nt += n
3276 n1 = 0
3277 while True:
3278 ol = bsf.readline()
3279 if ol == NeedMoreData:
3280 break
3281 om.append(ol)
3282 n1 += 1
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02003283 self.assertEqual(n, n1)
3284 self.assertEqual(len(om), nt)
3285 self.assertEqual(''.join([il for il, n in imt]), ''.join(om))
R. David Murray45bf773f2010-07-17 01:19:57 +00003286
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003287
Ezio Melottib3aedd42010-11-20 19:04:17 +00003288
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003289class TestParsers(TestEmailBase):
R David Murrayb35c8502011-04-13 16:46:05 -04003290
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003291 def test_header_parser(self):
3292 eq = self.assertEqual
3293 # Parse only the headers of a complex multipart MIME document
3294 with openfile('msg_02.txt') as fp:
3295 msg = HeaderParser().parse(fp)
3296 eq(msg['from'], 'ppp-request@zzz.org')
3297 eq(msg['to'], 'ppp@zzz.org')
3298 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003299 self.assertFalse(msg.is_multipart())
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02003300 self.assertIsInstance(msg.get_payload(), str)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003301
R David Murrayb35c8502011-04-13 16:46:05 -04003302 def test_bytes_header_parser(self):
3303 eq = self.assertEqual
3304 # Parse only the headers of a complex multipart MIME document
3305 with openfile('msg_02.txt', 'rb') as fp:
3306 msg = email.parser.BytesHeaderParser().parse(fp)
3307 eq(msg['from'], 'ppp-request@zzz.org')
3308 eq(msg['to'], 'ppp@zzz.org')
3309 eq(msg.get_content_type(), 'multipart/mixed')
3310 self.assertFalse(msg.is_multipart())
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02003311 self.assertIsInstance(msg.get_payload(), str)
3312 self.assertIsInstance(msg.get_payload(decode=True), bytes)
R David Murrayb35c8502011-04-13 16:46:05 -04003313
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003314 def test_whitespace_continuation(self):
3315 eq = self.assertEqual
3316 # This message contains a line after the Subject: header that has only
3317 # whitespace, but it is not empty!
3318 msg = email.message_from_string("""\
3319From: aperson@dom.ain
3320To: bperson@dom.ain
3321Subject: the next line has a space on it
3322\x20
3323Date: Mon, 8 Apr 2002 15:09:19 -0400
3324Message-ID: spam
3325
3326Here's the message body
3327""")
3328 eq(msg['subject'], 'the next line has a space on it\n ')
3329 eq(msg['message-id'], 'spam')
3330 eq(msg.get_payload(), "Here's the message body\n")
3331
3332 def test_whitespace_continuation_last_header(self):
3333 eq = self.assertEqual
3334 # Like the previous test, but the subject line is the last
3335 # header.
3336 msg = email.message_from_string("""\
3337From: aperson@dom.ain
3338To: bperson@dom.ain
3339Date: Mon, 8 Apr 2002 15:09:19 -0400
3340Message-ID: spam
3341Subject: the next line has a space on it
3342\x20
3343
3344Here's the message body
3345""")
3346 eq(msg['subject'], 'the next line has a space on it\n ')
3347 eq(msg['message-id'], 'spam')
3348 eq(msg.get_payload(), "Here's the message body\n")
3349
3350 def test_crlf_separation(self):
3351 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003352 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003353 msg = Parser().parse(fp)
3354 eq(len(msg.get_payload()), 2)
3355 part1 = msg.get_payload(0)
3356 eq(part1.get_content_type(), 'text/plain')
3357 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3358 part2 = msg.get_payload(1)
3359 eq(part2.get_content_type(), 'application/riscos')
3360
R. David Murray8451c4b2010-10-23 22:19:56 +00003361 def test_crlf_flatten(self):
3362 # Using newline='\n' preserves the crlfs in this input file.
3363 with openfile('msg_26.txt', newline='\n') as fp:
3364 text = fp.read()
3365 msg = email.message_from_string(text)
3366 s = StringIO()
3367 g = Generator(s)
3368 g.flatten(msg, linesep='\r\n')
3369 self.assertEqual(s.getvalue(), text)
3370
3371 maxDiff = None
3372
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003373 def test_multipart_digest_with_extra_mime_headers(self):
3374 eq = self.assertEqual
3375 neq = self.ndiffAssertEqual
3376 with openfile('msg_28.txt') as fp:
3377 msg = email.message_from_file(fp)
3378 # Structure is:
3379 # multipart/digest
3380 # message/rfc822
3381 # text/plain
3382 # message/rfc822
3383 # text/plain
3384 eq(msg.is_multipart(), 1)
3385 eq(len(msg.get_payload()), 2)
3386 part1 = msg.get_payload(0)
3387 eq(part1.get_content_type(), 'message/rfc822')
3388 eq(part1.is_multipart(), 1)
3389 eq(len(part1.get_payload()), 1)
3390 part1a = part1.get_payload(0)
3391 eq(part1a.is_multipart(), 0)
3392 eq(part1a.get_content_type(), 'text/plain')
3393 neq(part1a.get_payload(), 'message 1\n')
3394 # next message/rfc822
3395 part2 = msg.get_payload(1)
3396 eq(part2.get_content_type(), 'message/rfc822')
3397 eq(part2.is_multipart(), 1)
3398 eq(len(part2.get_payload()), 1)
3399 part2a = part2.get_payload(0)
3400 eq(part2a.is_multipart(), 0)
3401 eq(part2a.get_content_type(), 'text/plain')
3402 neq(part2a.get_payload(), 'message 2\n')
3403
3404 def test_three_lines(self):
3405 # A bug report by Andrew McNamara
3406 lines = ['From: Andrew Person <aperson@dom.ain',
3407 'Subject: Test',
3408 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3409 msg = email.message_from_string(NL.join(lines))
3410 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3411
3412 def test_strip_line_feed_and_carriage_return_in_headers(self):
3413 eq = self.assertEqual
3414 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3415 value1 = 'text'
3416 value2 = 'more text'
3417 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3418 value1, value2)
3419 msg = email.message_from_string(m)
3420 eq(msg.get('Header'), value1)
3421 eq(msg.get('Next-Header'), value2)
3422
3423 def test_rfc2822_header_syntax(self):
3424 eq = self.assertEqual
3425 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3426 msg = email.message_from_string(m)
3427 eq(len(msg), 3)
3428 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3429 eq(msg.get_payload(), 'body')
3430
3431 def test_rfc2822_space_not_allowed_in_header(self):
3432 eq = self.assertEqual
3433 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3434 msg = email.message_from_string(m)
3435 eq(len(msg.keys()), 0)
3436
3437 def test_rfc2822_one_character_header(self):
3438 eq = self.assertEqual
3439 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3440 msg = email.message_from_string(m)
3441 headers = msg.keys()
3442 headers.sort()
3443 eq(headers, ['A', 'B', 'CC'])
3444 eq(msg.get_payload(), 'body')
3445
R. David Murray45e0e142010-06-16 02:19:40 +00003446 def test_CRLFLF_at_end_of_part(self):
3447 # issue 5610: feedparser should not eat two chars from body part ending
3448 # with "\r\n\n".
3449 m = (
3450 "From: foo@bar.com\n"
3451 "To: baz\n"
3452 "Mime-Version: 1.0\n"
3453 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3454 "\n"
3455 "--BOUNDARY\n"
3456 "Content-Type: text/plain\n"
3457 "\n"
3458 "body ending with CRLF newline\r\n"
3459 "\n"
3460 "--BOUNDARY--\n"
3461 )
3462 msg = email.message_from_string(m)
3463 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003464
Ezio Melottib3aedd42010-11-20 19:04:17 +00003465
R. David Murray96fd54e2010-10-08 15:55:28 +00003466class Test8BitBytesHandling(unittest.TestCase):
3467 # In Python3 all input is string, but that doesn't work if the actual input
3468 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3469 # decode byte streams using the surrogateescape error handler, and
3470 # reconvert to binary at appropriate places if we detect surrogates. This
3471 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3472 # but it does allow us to parse and preserve them, and to decode body
3473 # parts that use an 8bit CTE.
3474
3475 bodytest_msg = textwrap.dedent("""\
3476 From: foo@bar.com
3477 To: baz
3478 Mime-Version: 1.0
3479 Content-Type: text/plain; charset={charset}
3480 Content-Transfer-Encoding: {cte}
3481
3482 {bodyline}
3483 """)
3484
3485 def test_known_8bit_CTE(self):
3486 m = self.bodytest_msg.format(charset='utf-8',
3487 cte='8bit',
3488 bodyline='pöstal').encode('utf-8')
3489 msg = email.message_from_bytes(m)
3490 self.assertEqual(msg.get_payload(), "pöstal\n")
3491 self.assertEqual(msg.get_payload(decode=True),
3492 "pöstal\n".encode('utf-8'))
3493
3494 def test_unknown_8bit_CTE(self):
3495 m = self.bodytest_msg.format(charset='notavalidcharset',
3496 cte='8bit',
3497 bodyline='pöstal').encode('utf-8')
3498 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003499 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003500 self.assertEqual(msg.get_payload(decode=True),
3501 "pöstal\n".encode('utf-8'))
3502
3503 def test_8bit_in_quopri_body(self):
3504 # This is non-RFC compliant data...without 'decode' the library code
3505 # decodes the body using the charset from the headers, and because the
3506 # source byte really is utf-8 this works. This is likely to fail
3507 # against real dirty data (ie: produce mojibake), but the data is
3508 # invalid anyway so it is as good a guess as any. But this means that
3509 # this test just confirms the current behavior; that behavior is not
3510 # necessarily the best possible behavior. With 'decode' it is
3511 # returning the raw bytes, so that test should be of correct behavior,
3512 # or at least produce the same result that email4 did.
3513 m = self.bodytest_msg.format(charset='utf-8',
3514 cte='quoted-printable',
3515 bodyline='p=C3=B6stál').encode('utf-8')
3516 msg = email.message_from_bytes(m)
3517 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3518 self.assertEqual(msg.get_payload(decode=True),
3519 'pöstál\n'.encode('utf-8'))
3520
3521 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3522 # This is similar to the previous test, but proves that if the 8bit
3523 # byte is undecodeable in the specified charset, it gets replaced
3524 # by the unicode 'unknown' character. Again, this may or may not
3525 # be the ideal behavior. Note that if decode=False none of the
3526 # decoders will get involved, so this is the only test we need
3527 # for this behavior.
3528 m = self.bodytest_msg.format(charset='ascii',
3529 cte='quoted-printable',
3530 bodyline='p=C3=B6stál').encode('utf-8')
3531 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003532 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003533 self.assertEqual(msg.get_payload(decode=True),
3534 'pöstál\n'.encode('utf-8'))
3535
R David Murray80e0aee2012-05-27 21:23:34 -04003536 # test_defect_handling:test_invalid_chars_in_base64_payload
R. David Murray96fd54e2010-10-08 15:55:28 +00003537 def test_8bit_in_base64_body(self):
R David Murray80e0aee2012-05-27 21:23:34 -04003538 # If we get 8bit bytes in a base64 body, we can just ignore them
3539 # as being outside the base64 alphabet and decode anyway. But
3540 # we register a defect.
R. David Murray96fd54e2010-10-08 15:55:28 +00003541 m = self.bodytest_msg.format(charset='utf-8',
3542 cte='base64',
3543 bodyline='cMO2c3RhbAá=').encode('utf-8')
3544 msg = email.message_from_bytes(m)
3545 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -04003546 'pöstal'.encode('utf-8'))
3547 self.assertIsInstance(msg.defects[0],
3548 errors.InvalidBase64CharactersDefect)
R. David Murray96fd54e2010-10-08 15:55:28 +00003549
3550 def test_8bit_in_uuencode_body(self):
3551 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3552 # normal means, so the block is returned undecoded, but as bytes.
3553 m = self.bodytest_msg.format(charset='utf-8',
3554 cte='uuencode',
3555 bodyline='<,.V<W1A; á ').encode('utf-8')
3556 msg = email.message_from_bytes(m)
3557 self.assertEqual(msg.get_payload(decode=True),
3558 '<,.V<W1A; á \n'.encode('utf-8'))
3559
3560
R. David Murray92532142011-01-07 23:25:30 +00003561 headertest_headers = (
3562 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3563 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3564 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3565 '\tJean de Baddie',
3566 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3567 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3568 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3569 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3570 )
3571 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3572 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003573
3574 def test_get_8bit_header(self):
3575 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003576 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3577 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003578
3579 def test_print_8bit_headers(self):
3580 msg = email.message_from_bytes(self.headertest_msg)
3581 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003582 textwrap.dedent("""\
3583 From: {}
3584 To: {}
3585 Subject: {}
3586 From: {}
3587
3588 Yes, they are flying.
3589 """).format(*[expected[1] for (_, expected) in
3590 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003591
3592 def test_values_with_8bit_headers(self):
3593 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003594 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003595 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003596 'b\uFFFD\uFFFDz',
3597 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3598 'coll\uFFFD\uFFFDgue, le pouf '
3599 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003600 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003601 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003602
3603 def test_items_with_8bit_headers(self):
3604 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003605 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003606 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003607 ('To', 'b\uFFFD\uFFFDz'),
3608 ('Subject', 'Maintenant je vous '
3609 'pr\uFFFD\uFFFDsente '
3610 'mon coll\uFFFD\uFFFDgue, le pouf '
3611 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3612 '\tJean de Baddie'),
3613 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003614
3615 def test_get_all_with_8bit_headers(self):
3616 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003617 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003618 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003619 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003620
R David Murraya2150232011-03-16 21:11:23 -04003621 def test_get_content_type_with_8bit(self):
3622 msg = email.message_from_bytes(textwrap.dedent("""\
3623 Content-Type: text/pl\xA7in; charset=utf-8
3624 """).encode('latin-1'))
3625 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3626 self.assertEqual(msg.get_content_maintype(), "text")
3627 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3628
R David Murray97f43c02012-06-24 05:03:27 -04003629 # test_headerregistry.TestContentTypeHeader.non_ascii_in_params
R David Murraya2150232011-03-16 21:11:23 -04003630 def test_get_params_with_8bit(self):
3631 msg = email.message_from_bytes(
3632 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3633 self.assertEqual(msg.get_params(header='x-header'),
3634 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3635 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3636 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3637 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3638
R David Murray97f43c02012-06-24 05:03:27 -04003639 # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value
R David Murraya2150232011-03-16 21:11:23 -04003640 def test_get_rfc2231_params_with_8bit(self):
3641 msg = email.message_from_bytes(textwrap.dedent("""\
3642 Content-Type: text/plain; charset=us-ascii;
3643 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3644 ).encode('latin-1'))
3645 self.assertEqual(msg.get_param('title'),
3646 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3647
3648 def test_set_rfc2231_params_with_8bit(self):
3649 msg = email.message_from_bytes(textwrap.dedent("""\
3650 Content-Type: text/plain; charset=us-ascii;
3651 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3652 ).encode('latin-1'))
3653 msg.set_param('title', 'test')
3654 self.assertEqual(msg.get_param('title'), 'test')
3655
3656 def test_del_rfc2231_params_with_8bit(self):
3657 msg = email.message_from_bytes(textwrap.dedent("""\
3658 Content-Type: text/plain; charset=us-ascii;
3659 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3660 ).encode('latin-1'))
3661 msg.del_param('title')
3662 self.assertEqual(msg.get_param('title'), None)
3663 self.assertEqual(msg.get_content_maintype(), 'text')
3664
3665 def test_get_payload_with_8bit_cte_header(self):
3666 msg = email.message_from_bytes(textwrap.dedent("""\
3667 Content-Transfer-Encoding: b\xa7se64
3668 Content-Type: text/plain; charset=latin-1
3669
3670 payload
3671 """).encode('latin-1'))
3672 self.assertEqual(msg.get_payload(), 'payload\n')
3673 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3674
R. David Murray96fd54e2010-10-08 15:55:28 +00003675 non_latin_bin_msg = textwrap.dedent("""\
3676 From: foo@bar.com
3677 To: báz
3678 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3679 \tJean de Baddie
3680 Mime-Version: 1.0
3681 Content-Type: text/plain; charset="utf-8"
3682 Content-Transfer-Encoding: 8bit
3683
3684 Да, они летят.
3685 """).encode('utf-8')
3686
3687 def test_bytes_generator(self):
3688 msg = email.message_from_bytes(self.non_latin_bin_msg)
3689 out = BytesIO()
3690 email.generator.BytesGenerator(out).flatten(msg)
3691 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3692
R. David Murray7372a072011-01-26 21:21:32 +00003693 def test_bytes_generator_handles_None_body(self):
3694 #Issue 11019
3695 msg = email.message.Message()
3696 out = BytesIO()
3697 email.generator.BytesGenerator(out).flatten(msg)
3698 self.assertEqual(out.getvalue(), b"\n")
3699
R. David Murray92532142011-01-07 23:25:30 +00003700 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003701 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003702 To: =?unknown-8bit?q?b=C3=A1z?=
3703 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3704 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3705 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003706 Mime-Version: 1.0
3707 Content-Type: text/plain; charset="utf-8"
3708 Content-Transfer-Encoding: base64
3709
3710 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3711 """)
3712
3713 def test_generator_handles_8bit(self):
3714 msg = email.message_from_bytes(self.non_latin_bin_msg)
3715 out = StringIO()
3716 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003717 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003718
3719 def test_bytes_generator_with_unix_from(self):
3720 # The unixfrom contains a current date, so we can't check it
3721 # literally. Just make sure the first word is 'From' and the
3722 # rest of the message matches the input.
3723 msg = email.message_from_bytes(self.non_latin_bin_msg)
3724 out = BytesIO()
3725 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3726 lines = out.getvalue().split(b'\n')
3727 self.assertEqual(lines[0].split()[0], b'From')
3728 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3729
R. David Murray92532142011-01-07 23:25:30 +00003730 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3731 non_latin_bin_msg_as7bit[2:4] = [
3732 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3733 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3734 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3735
R. David Murray96fd54e2010-10-08 15:55:28 +00003736 def test_message_from_binary_file(self):
3737 fn = 'test.msg'
3738 self.addCleanup(unlink, fn)
3739 with open(fn, 'wb') as testfile:
3740 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003741 with open(fn, 'rb') as testfile:
3742 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003743 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3744
3745 latin_bin_msg = textwrap.dedent("""\
3746 From: foo@bar.com
3747 To: Dinsdale
3748 Subject: Nudge nudge, wink, wink
3749 Mime-Version: 1.0
3750 Content-Type: text/plain; charset="latin-1"
3751 Content-Transfer-Encoding: 8bit
3752
3753 oh là là, know what I mean, know what I mean?
3754 """).encode('latin-1')
3755
3756 latin_bin_msg_as7bit = textwrap.dedent("""\
3757 From: foo@bar.com
3758 To: Dinsdale
3759 Subject: Nudge nudge, wink, wink
3760 Mime-Version: 1.0
3761 Content-Type: text/plain; charset="iso-8859-1"
3762 Content-Transfer-Encoding: quoted-printable
3763
3764 oh l=E0 l=E0, know what I mean, know what I mean?
3765 """)
3766
3767 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3768 m = email.message_from_bytes(self.latin_bin_msg)
3769 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3770
3771 def test_decoded_generator_emits_unicode_body(self):
3772 m = email.message_from_bytes(self.latin_bin_msg)
3773 out = StringIO()
3774 email.generator.DecodedGenerator(out).flatten(m)
3775 #DecodedHeader output contains an extra blank line compared
3776 #to the input message. RDM: not sure if this is a bug or not,
3777 #but it is not specific to the 8bit->7bit conversion.
3778 self.assertEqual(out.getvalue(),
3779 self.latin_bin_msg.decode('latin-1')+'\n')
3780
3781 def test_bytes_feedparser(self):
3782 bfp = email.feedparser.BytesFeedParser()
3783 for i in range(0, len(self.latin_bin_msg), 10):
3784 bfp.feed(self.latin_bin_msg[i:i+10])
3785 m = bfp.close()
3786 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3787
R. David Murray8451c4b2010-10-23 22:19:56 +00003788 def test_crlf_flatten(self):
3789 with openfile('msg_26.txt', 'rb') as fp:
3790 text = fp.read()
3791 msg = email.message_from_bytes(text)
3792 s = BytesIO()
3793 g = email.generator.BytesGenerator(s)
3794 g.flatten(msg, linesep='\r\n')
3795 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003796
3797 def test_8bit_multipart(self):
3798 # Issue 11605
3799 source = textwrap.dedent("""\
3800 Date: Fri, 18 Mar 2011 17:15:43 +0100
3801 To: foo@example.com
3802 From: foodwatch-Newsletter <bar@example.com>
3803 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3804 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3805 MIME-Version: 1.0
3806 Content-Type: multipart/alternative;
3807 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3808
3809 --b1_76a486bee62b0d200f33dc2ca08220ad
3810 Content-Type: text/plain; charset="utf-8"
3811 Content-Transfer-Encoding: 8bit
3812
3813 Guten Tag, ,
3814
3815 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3816 Nachrichten aus Japan.
3817
3818
3819 --b1_76a486bee62b0d200f33dc2ca08220ad
3820 Content-Type: text/html; charset="utf-8"
3821 Content-Transfer-Encoding: 8bit
3822
3823 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3824 "http://www.w3.org/TR/html4/loose.dtd">
3825 <html lang="de">
3826 <head>
3827 <title>foodwatch - Newsletter</title>
3828 </head>
3829 <body>
3830 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3831 die Nachrichten aus Japan.</p>
3832 </body>
3833 </html>
3834 --b1_76a486bee62b0d200f33dc2ca08220ad--
3835
3836 """).encode('utf-8')
3837 msg = email.message_from_bytes(source)
3838 s = BytesIO()
3839 g = email.generator.BytesGenerator(s)
3840 g.flatten(msg)
3841 self.assertEqual(s.getvalue(), source)
3842
R David Murray9fd170e2012-03-14 14:05:03 -04003843 def test_bytes_generator_b_encoding_linesep(self):
3844 # Issue 14062: b encoding was tacking on an extra \n.
3845 m = Message()
3846 # This has enough non-ascii that it should always end up b encoded.
3847 m['Subject'] = Header('žluťoučký kůň')
3848 s = BytesIO()
3849 g = email.generator.BytesGenerator(s)
3850 g.flatten(m, linesep='\r\n')
3851 self.assertEqual(
3852 s.getvalue(),
3853 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3854
3855 def test_generator_b_encoding_linesep(self):
3856 # Since this broke in ByteGenerator, test Generator for completeness.
3857 m = Message()
3858 # This has enough non-ascii that it should always end up b encoded.
3859 m['Subject'] = Header('žluťoučký kůň')
3860 s = StringIO()
3861 g = email.generator.Generator(s)
3862 g.flatten(m, linesep='\r\n')
3863 self.assertEqual(
3864 s.getvalue(),
3865 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3866
R. David Murray8451c4b2010-10-23 22:19:56 +00003867 maxDiff = None
3868
Ezio Melottib3aedd42010-11-20 19:04:17 +00003869
R. David Murray719a4492010-11-21 16:53:48 +00003870class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003871
R. David Murraye5db2632010-11-20 15:10:13 +00003872 maxDiff = None
3873
R. David Murray96fd54e2010-10-08 15:55:28 +00003874 def _msgobj(self, filename):
3875 with openfile(filename, 'rb') as fp:
3876 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003877 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003878 msg = email.message_from_bytes(data)
3879 return msg, data
3880
R. David Murray719a4492010-11-21 16:53:48 +00003881 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003882 b = BytesIO()
3883 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003884 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R David Murraya46ed112011-03-31 13:11:40 -04003885 self.assertEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003886
3887
R. David Murray719a4492010-11-21 16:53:48 +00003888class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3889 TestIdempotent):
3890 linesep = '\n'
3891 blinesep = b'\n'
3892 normalize_linesep_regex = re.compile(br'\r\n')
3893
3894
3895class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3896 TestIdempotent):
3897 linesep = '\r\n'
3898 blinesep = b'\r\n'
3899 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3900
Ezio Melottib3aedd42010-11-20 19:04:17 +00003901
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003902class TestBase64(unittest.TestCase):
3903 def test_len(self):
3904 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003905 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003906 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003907 for size in range(15):
3908 if size == 0 : bsize = 0
3909 elif size <= 3 : bsize = 4
3910 elif size <= 6 : bsize = 8
3911 elif size <= 9 : bsize = 12
3912 elif size <= 12: bsize = 16
3913 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003914 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003915
3916 def test_decode(self):
3917 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003918 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003919 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003920
3921 def test_encode(self):
3922 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003923 eq(base64mime.body_encode(b''), b'')
3924 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003925 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003926 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003927 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003928 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003929eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3930eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3931eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3932eHh4eCB4eHh4IA==
3933""")
3934 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003935 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003936 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003937eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3938eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3939eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3940eHh4eCB4eHh4IA==\r
3941""")
3942
3943 def test_header_encode(self):
3944 eq = self.assertEqual
3945 he = base64mime.header_encode
3946 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003947 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3948 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003949 # Test the charset option
3950 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3951 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003952
3953
Ezio Melottib3aedd42010-11-20 19:04:17 +00003954
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003955class TestQuopri(unittest.TestCase):
3956 def setUp(self):
3957 # Set of characters (as byte integers) that don't need to be encoded
3958 # in headers.
3959 self.hlit = list(chain(
3960 range(ord('a'), ord('z') + 1),
3961 range(ord('A'), ord('Z') + 1),
3962 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003963 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003964 # Set of characters (as byte integers) that do need to be encoded in
3965 # headers.
3966 self.hnon = [c for c in range(256) if c not in self.hlit]
3967 assert len(self.hlit) + len(self.hnon) == 256
3968 # Set of characters (as byte integers) that don't need to be encoded
3969 # in bodies.
3970 self.blit = list(range(ord(' '), ord('~') + 1))
3971 self.blit.append(ord('\t'))
3972 self.blit.remove(ord('='))
3973 # Set of characters (as byte integers) that do need to be encoded in
3974 # bodies.
3975 self.bnon = [c for c in range(256) if c not in self.blit]
3976 assert len(self.blit) + len(self.bnon) == 256
3977
Guido van Rossum9604e662007-08-30 03:46:43 +00003978 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003979 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003980 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003981 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003982 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003983 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003984 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003985
Guido van Rossum9604e662007-08-30 03:46:43 +00003986 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003987 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003988 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003989 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003990 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003991 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003992 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003993
3994 def test_header_quopri_len(self):
3995 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003996 eq(quoprimime.header_length(b'hello'), 5)
3997 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003998 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003999 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004000 # =?xxx?q?...?= means 10 extra characters
4001 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00004002 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
4003 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004004 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00004005 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004006 # =?xxx?q?...?= means 10 extra characters
4007 10)
4008 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00004009 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004010 'expected length 1 for %r' % chr(c))
4011 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00004012 # Space is special; it's encoded to _
4013 if c == ord(' '):
4014 continue
4015 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004016 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00004017 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004018
4019 def test_body_quopri_len(self):
4020 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004021 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00004022 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004023 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00004024 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004025
4026 def test_quote_unquote_idempotent(self):
4027 for x in range(256):
4028 c = chr(x)
4029 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
4030
R David Murrayec1b5b82011-03-23 14:19:05 -04004031 def _test_header_encode(self, header, expected_encoded_header, charset=None):
4032 if charset is None:
4033 encoded_header = quoprimime.header_encode(header)
4034 else:
4035 encoded_header = quoprimime.header_encode(header, charset)
4036 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004037
R David Murraycafd79d2011-03-23 15:25:55 -04004038 def test_header_encode_null(self):
4039 self._test_header_encode(b'', '')
4040
R David Murrayec1b5b82011-03-23 14:19:05 -04004041 def test_header_encode_one_word(self):
4042 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
4043
4044 def test_header_encode_two_lines(self):
4045 self._test_header_encode(b'hello\nworld',
4046 '=?iso-8859-1?q?hello=0Aworld?=')
4047
4048 def test_header_encode_non_ascii(self):
4049 self._test_header_encode(b'hello\xc7there',
4050 '=?iso-8859-1?q?hello=C7there?=')
4051
4052 def test_header_encode_alt_charset(self):
4053 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
4054 charset='iso-8859-2')
4055
4056 def _test_header_decode(self, encoded_header, expected_decoded_header):
4057 decoded_header = quoprimime.header_decode(encoded_header)
4058 self.assertEqual(decoded_header, expected_decoded_header)
4059
4060 def test_header_decode_null(self):
4061 self._test_header_decode('', '')
4062
4063 def test_header_decode_one_word(self):
4064 self._test_header_decode('hello', 'hello')
4065
4066 def test_header_decode_two_lines(self):
4067 self._test_header_decode('hello=0Aworld', 'hello\nworld')
4068
4069 def test_header_decode_non_ascii(self):
4070 self._test_header_decode('hello=C7there', 'hello\xc7there')
4071
Ezio Melotti2a99d5d2013-07-06 17:16:04 +02004072 def test_header_decode_re_bug_18380(self):
4073 # Issue 18380: Call re.sub with a positional argument for flags in the wrong position
4074 self.assertEqual(quoprimime.header_decode('=30' * 257), '0' * 257)
4075
R David Murrayec1b5b82011-03-23 14:19:05 -04004076 def _test_decode(self, encoded, expected_decoded, eol=None):
4077 if eol is None:
4078 decoded = quoprimime.decode(encoded)
4079 else:
4080 decoded = quoprimime.decode(encoded, eol=eol)
4081 self.assertEqual(decoded, expected_decoded)
4082
4083 def test_decode_null_word(self):
4084 self._test_decode('', '')
4085
4086 def test_decode_null_line_null_word(self):
4087 self._test_decode('\r\n', '\n')
4088
4089 def test_decode_one_word(self):
4090 self._test_decode('hello', 'hello')
4091
4092 def test_decode_one_word_eol(self):
4093 self._test_decode('hello', 'hello', eol='X')
4094
4095 def test_decode_one_line(self):
4096 self._test_decode('hello\r\n', 'hello\n')
4097
4098 def test_decode_one_line_lf(self):
4099 self._test_decode('hello\n', 'hello\n')
4100
R David Murraycafd79d2011-03-23 15:25:55 -04004101 def test_decode_one_line_cr(self):
4102 self._test_decode('hello\r', 'hello\n')
4103
4104 def test_decode_one_line_nl(self):
4105 self._test_decode('hello\n', 'helloX', eol='X')
4106
4107 def test_decode_one_line_crnl(self):
4108 self._test_decode('hello\r\n', 'helloX', eol='X')
4109
R David Murrayec1b5b82011-03-23 14:19:05 -04004110 def test_decode_one_line_one_word(self):
4111 self._test_decode('hello\r\nworld', 'hello\nworld')
4112
4113 def test_decode_one_line_one_word_eol(self):
4114 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
4115
4116 def test_decode_two_lines(self):
4117 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
4118
R David Murraycafd79d2011-03-23 15:25:55 -04004119 def test_decode_two_lines_eol(self):
4120 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
4121
R David Murrayec1b5b82011-03-23 14:19:05 -04004122 def test_decode_one_long_line(self):
4123 self._test_decode('Spam' * 250, 'Spam' * 250)
4124
4125 def test_decode_one_space(self):
4126 self._test_decode(' ', '')
4127
4128 def test_decode_multiple_spaces(self):
4129 self._test_decode(' ' * 5, '')
4130
4131 def test_decode_one_line_trailing_spaces(self):
4132 self._test_decode('hello \r\n', 'hello\n')
4133
4134 def test_decode_two_lines_trailing_spaces(self):
4135 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
4136
4137 def test_decode_quoted_word(self):
4138 self._test_decode('=22quoted=20words=22', '"quoted words"')
4139
4140 def test_decode_uppercase_quoting(self):
4141 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
4142
4143 def test_decode_lowercase_quoting(self):
4144 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
4145
4146 def test_decode_soft_line_break(self):
4147 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
4148
4149 def test_decode_false_quoting(self):
4150 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
4151
4152 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
4153 kwargs = {}
4154 if maxlinelen is None:
4155 # Use body_encode's default.
4156 maxlinelen = 76
4157 else:
4158 kwargs['maxlinelen'] = maxlinelen
4159 if eol is None:
4160 # Use body_encode's default.
4161 eol = '\n'
4162 else:
4163 kwargs['eol'] = eol
4164 encoded_body = quoprimime.body_encode(body, **kwargs)
4165 self.assertEqual(encoded_body, expected_encoded_body)
4166 if eol == '\n' or eol == '\r\n':
4167 # We know how to split the result back into lines, so maxlinelen
4168 # can be checked.
4169 for line in encoded_body.splitlines():
4170 self.assertLessEqual(len(line), maxlinelen)
4171
4172 def test_encode_null(self):
4173 self._test_encode('', '')
4174
4175 def test_encode_null_lines(self):
4176 self._test_encode('\n\n', '\n\n')
4177
4178 def test_encode_one_line(self):
4179 self._test_encode('hello\n', 'hello\n')
4180
4181 def test_encode_one_line_crlf(self):
4182 self._test_encode('hello\r\n', 'hello\n')
4183
4184 def test_encode_one_line_eol(self):
4185 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
4186
4187 def test_encode_one_space(self):
4188 self._test_encode(' ', '=20')
4189
4190 def test_encode_one_line_one_space(self):
4191 self._test_encode(' \n', '=20\n')
4192
R David Murrayb938c8c2011-03-24 12:19:26 -04004193# XXX: body_encode() expect strings, but uses ord(char) from these strings
4194# to index into a 256-entry list. For code points above 255, this will fail.
4195# Should there be a check for 8-bit only ord() values in body, or at least
4196# a comment about the expected input?
4197
4198 def test_encode_two_lines_one_space(self):
4199 self._test_encode(' \n \n', '=20\n=20\n')
4200
R David Murrayec1b5b82011-03-23 14:19:05 -04004201 def test_encode_one_word_trailing_spaces(self):
4202 self._test_encode('hello ', 'hello =20')
4203
4204 def test_encode_one_line_trailing_spaces(self):
4205 self._test_encode('hello \n', 'hello =20\n')
4206
4207 def test_encode_one_word_trailing_tab(self):
4208 self._test_encode('hello \t', 'hello =09')
4209
4210 def test_encode_one_line_trailing_tab(self):
4211 self._test_encode('hello \t\n', 'hello =09\n')
4212
4213 def test_encode_trailing_space_before_maxlinelen(self):
4214 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
4215
R David Murrayb938c8c2011-03-24 12:19:26 -04004216 def test_encode_trailing_space_at_maxlinelen(self):
4217 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
4218
R David Murrayec1b5b82011-03-23 14:19:05 -04004219 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04004220 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
4221
4222 def test_encode_whitespace_lines(self):
4223 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04004224
4225 def test_encode_quoted_equals(self):
4226 self._test_encode('a = b', 'a =3D b')
4227
4228 def test_encode_one_long_string(self):
4229 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
4230
4231 def test_encode_one_long_line(self):
4232 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4233
4234 def test_encode_one_very_long_line(self):
4235 self._test_encode('x' * 200 + '\n',
4236 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
4237
R David Murrayec1b5b82011-03-23 14:19:05 -04004238 def test_encode_shortest_maxlinelen(self):
4239 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004240
R David Murrayb938c8c2011-03-24 12:19:26 -04004241 def test_encode_maxlinelen_too_small(self):
4242 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
4243
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004244 def test_encode(self):
4245 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00004246 eq(quoprimime.body_encode(''), '')
4247 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004248 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00004249 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004250 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00004251 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004252xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
4253 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
4254x xxxx xxxx xxxx xxxx=20""")
4255 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00004256 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4257 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004258xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
4259 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
4260x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00004261 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004262one line
4263
4264two line"""), """\
4265one line
4266
4267two line""")
4268
4269
Ezio Melottib3aedd42010-11-20 19:04:17 +00004270
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004271# Test the Charset class
4272class TestCharset(unittest.TestCase):
4273 def tearDown(self):
4274 from email import charset as CharsetModule
4275 try:
4276 del CharsetModule.CHARSETS['fake']
4277 except KeyError:
4278 pass
4279
Guido van Rossum9604e662007-08-30 03:46:43 +00004280 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004281 eq = self.assertEqual
4282 # Make sure us-ascii = no Unicode conversion
4283 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00004284 eq(c.header_encode('Hello World!'), 'Hello World!')
4285 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004286 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00004287 self.assertRaises(UnicodeError, c.header_encode, s)
4288 c = Charset('utf-8')
4289 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004290
4291 def test_body_encode(self):
4292 eq = self.assertEqual
4293 # Try a charset with QP body encoding
4294 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004295 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004296 # Try a charset with Base64 body encoding
4297 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00004298 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004299 # Try a charset with None body encoding
4300 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004301 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004302 # Try the convert argument, where input codec != output codec
4303 c = Charset('euc-jp')
4304 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00004305 # XXX FIXME
4306## try:
4307## eq('\x1b$B5FCO;~IW\x1b(B',
4308## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4309## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4310## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4311## except LookupError:
4312## # We probably don't have the Japanese codecs installed
4313## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004314 # Testing SF bug #625509, which we have to fake, since there are no
4315 # built-in encodings where the header encoding is QP but the body
4316 # encoding is not.
4317 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04004318 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004319 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04004320 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004321
4322 def test_unicode_charset_name(self):
4323 charset = Charset('us-ascii')
4324 self.assertEqual(str(charset), 'us-ascii')
4325 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4326
4327
Ezio Melottib3aedd42010-11-20 19:04:17 +00004328
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004329# Test multilingual MIME headers.
4330class TestHeader(TestEmailBase):
4331 def test_simple(self):
4332 eq = self.ndiffAssertEqual
4333 h = Header('Hello World!')
4334 eq(h.encode(), 'Hello World!')
4335 h.append(' Goodbye World!')
4336 eq(h.encode(), 'Hello World! Goodbye World!')
4337
4338 def test_simple_surprise(self):
4339 eq = self.ndiffAssertEqual
4340 h = Header('Hello World!')
4341 eq(h.encode(), 'Hello World!')
4342 h.append('Goodbye World!')
4343 eq(h.encode(), 'Hello World! Goodbye World!')
4344
4345 def test_header_needs_no_decoding(self):
4346 h = 'no decoding needed'
4347 self.assertEqual(decode_header(h), [(h, None)])
4348
4349 def test_long(self):
4350 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4351 maxlinelen=76)
4352 for l in h.encode(splitchars=' ').split('\n '):
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02004353 self.assertLessEqual(len(l), 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004354
4355 def test_multilingual(self):
4356 eq = self.ndiffAssertEqual
4357 g = Charset("iso-8859-1")
4358 cz = Charset("iso-8859-2")
4359 utf8 = Charset("utf-8")
4360 g_head = (b'Die Mieter treten hier ein werden mit einem '
4361 b'Foerderband komfortabel den Korridor entlang, '
4362 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4363 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4364 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4365 b'd\xf9vtipu.. ')
4366 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4367 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4368 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4369 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4370 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4371 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4372 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4373 '\u3044\u307e\u3059\u3002')
4374 h = Header(g_head, g)
4375 h.append(cz_head, cz)
4376 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004377 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004378 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004379=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4380 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4381 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4382 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004383 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4384 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4385 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4386 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004387 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4388 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4389 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4390 decoded = decode_header(enc)
4391 eq(len(decoded), 3)
4392 eq(decoded[0], (g_head, 'iso-8859-1'))
4393 eq(decoded[1], (cz_head, 'iso-8859-2'))
4394 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004395 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004396 eq(ustr,
4397 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4398 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4399 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4400 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4401 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4402 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4403 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4404 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4405 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4406 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4407 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4408 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4409 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4410 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4411 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4412 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4413 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004414 # Test make_header()
4415 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004416 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004417
4418 def test_empty_header_encode(self):
4419 h = Header()
4420 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004421
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004422 def test_header_ctor_default_args(self):
4423 eq = self.ndiffAssertEqual
4424 h = Header()
4425 eq(h, '')
4426 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004427 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004428
4429 def test_explicit_maxlinelen(self):
4430 eq = self.ndiffAssertEqual
4431 hstr = ('A very long line that must get split to something other '
4432 'than at the 76th character boundary to test the non-default '
4433 'behavior')
4434 h = Header(hstr)
4435 eq(h.encode(), '''\
4436A very long line that must get split to something other than at the 76th
4437 character boundary to test the non-default behavior''')
4438 eq(str(h), hstr)
4439 h = Header(hstr, header_name='Subject')
4440 eq(h.encode(), '''\
4441A very long line that must get split to something other than at the
4442 76th character boundary to test the non-default behavior''')
4443 eq(str(h), hstr)
4444 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4445 eq(h.encode(), hstr)
4446 eq(str(h), hstr)
4447
Guido van Rossum9604e662007-08-30 03:46:43 +00004448 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004449 eq = self.ndiffAssertEqual
4450 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004451 x = 'xxxx ' * 20
4452 h.append(x)
4453 s = h.encode()
4454 eq(s, """\
4455=?iso-8859-1?q?xxx?=
4456 =?iso-8859-1?q?x_?=
4457 =?iso-8859-1?q?xx?=
4458 =?iso-8859-1?q?xx?=
4459 =?iso-8859-1?q?_x?=
4460 =?iso-8859-1?q?xx?=
4461 =?iso-8859-1?q?x_?=
4462 =?iso-8859-1?q?xx?=
4463 =?iso-8859-1?q?xx?=
4464 =?iso-8859-1?q?_x?=
4465 =?iso-8859-1?q?xx?=
4466 =?iso-8859-1?q?x_?=
4467 =?iso-8859-1?q?xx?=
4468 =?iso-8859-1?q?xx?=
4469 =?iso-8859-1?q?_x?=
4470 =?iso-8859-1?q?xx?=
4471 =?iso-8859-1?q?x_?=
4472 =?iso-8859-1?q?xx?=
4473 =?iso-8859-1?q?xx?=
4474 =?iso-8859-1?q?_x?=
4475 =?iso-8859-1?q?xx?=
4476 =?iso-8859-1?q?x_?=
4477 =?iso-8859-1?q?xx?=
4478 =?iso-8859-1?q?xx?=
4479 =?iso-8859-1?q?_x?=
4480 =?iso-8859-1?q?xx?=
4481 =?iso-8859-1?q?x_?=
4482 =?iso-8859-1?q?xx?=
4483 =?iso-8859-1?q?xx?=
4484 =?iso-8859-1?q?_x?=
4485 =?iso-8859-1?q?xx?=
4486 =?iso-8859-1?q?x_?=
4487 =?iso-8859-1?q?xx?=
4488 =?iso-8859-1?q?xx?=
4489 =?iso-8859-1?q?_x?=
4490 =?iso-8859-1?q?xx?=
4491 =?iso-8859-1?q?x_?=
4492 =?iso-8859-1?q?xx?=
4493 =?iso-8859-1?q?xx?=
4494 =?iso-8859-1?q?_x?=
4495 =?iso-8859-1?q?xx?=
4496 =?iso-8859-1?q?x_?=
4497 =?iso-8859-1?q?xx?=
4498 =?iso-8859-1?q?xx?=
4499 =?iso-8859-1?q?_x?=
4500 =?iso-8859-1?q?xx?=
4501 =?iso-8859-1?q?x_?=
4502 =?iso-8859-1?q?xx?=
4503 =?iso-8859-1?q?xx?=
4504 =?iso-8859-1?q?_?=""")
4505 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004506 h = Header(charset='iso-8859-1', maxlinelen=40)
4507 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004508 s = h.encode()
4509 eq(s, """\
4510=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4511 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4512 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4513 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4514 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4515 eq(x, str(make_header(decode_header(s))))
4516
4517 def test_base64_splittable(self):
4518 eq = self.ndiffAssertEqual
4519 h = Header(charset='koi8-r', maxlinelen=20)
4520 x = 'xxxx ' * 20
4521 h.append(x)
4522 s = h.encode()
4523 eq(s, """\
4524=?koi8-r?b?eHh4?=
4525 =?koi8-r?b?eCB4?=
4526 =?koi8-r?b?eHh4?=
4527 =?koi8-r?b?IHh4?=
4528 =?koi8-r?b?eHgg?=
4529 =?koi8-r?b?eHh4?=
4530 =?koi8-r?b?eCB4?=
4531 =?koi8-r?b?eHh4?=
4532 =?koi8-r?b?IHh4?=
4533 =?koi8-r?b?eHgg?=
4534 =?koi8-r?b?eHh4?=
4535 =?koi8-r?b?eCB4?=
4536 =?koi8-r?b?eHh4?=
4537 =?koi8-r?b?IHh4?=
4538 =?koi8-r?b?eHgg?=
4539 =?koi8-r?b?eHh4?=
4540 =?koi8-r?b?eCB4?=
4541 =?koi8-r?b?eHh4?=
4542 =?koi8-r?b?IHh4?=
4543 =?koi8-r?b?eHgg?=
4544 =?koi8-r?b?eHh4?=
4545 =?koi8-r?b?eCB4?=
4546 =?koi8-r?b?eHh4?=
4547 =?koi8-r?b?IHh4?=
4548 =?koi8-r?b?eHgg?=
4549 =?koi8-r?b?eHh4?=
4550 =?koi8-r?b?eCB4?=
4551 =?koi8-r?b?eHh4?=
4552 =?koi8-r?b?IHh4?=
4553 =?koi8-r?b?eHgg?=
4554 =?koi8-r?b?eHh4?=
4555 =?koi8-r?b?eCB4?=
4556 =?koi8-r?b?eHh4?=
4557 =?koi8-r?b?IA==?=""")
4558 eq(x, str(make_header(decode_header(s))))
4559 h = Header(charset='koi8-r', maxlinelen=40)
4560 h.append(x)
4561 s = h.encode()
4562 eq(s, """\
4563=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4564 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4565 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4566 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4567 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4568 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4569 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004570
4571 def test_us_ascii_header(self):
4572 eq = self.assertEqual
4573 s = 'hello'
4574 x = decode_header(s)
4575 eq(x, [('hello', None)])
4576 h = make_header(x)
4577 eq(s, h.encode())
4578
4579 def test_string_charset(self):
4580 eq = self.assertEqual
4581 h = Header()
4582 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004583 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004584
4585## def test_unicode_error(self):
4586## raises = self.assertRaises
4587## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4588## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4589## h = Header()
4590## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4591## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4592## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4593
4594 def test_utf8_shortest(self):
4595 eq = self.assertEqual
4596 h = Header('p\xf6stal', 'utf-8')
4597 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4598 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4599 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4600
4601 def test_bad_8bit_header(self):
4602 raises = self.assertRaises
4603 eq = self.assertEqual
4604 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4605 raises(UnicodeError, Header, x)
4606 h = Header()
4607 raises(UnicodeError, h.append, x)
4608 e = x.decode('utf-8', 'replace')
4609 eq(str(Header(x, errors='replace')), e)
4610 h.append(x, errors='replace')
4611 eq(str(h), e)
4612
R David Murray041015c2011-03-25 15:10:55 -04004613 def test_escaped_8bit_header(self):
4614 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004615 e = x.decode('ascii', 'surrogateescape')
4616 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004617 self.assertEqual(str(h),
4618 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4619 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4620
R David Murraye5e366c2011-06-18 12:57:28 -04004621 def test_header_handles_binary_unknown8bit(self):
4622 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4623 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4624 self.assertEqual(str(h),
4625 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4626 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4627
4628 def test_make_header_handles_binary_unknown8bit(self):
4629 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4630 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4631 h2 = email.header.make_header(email.header.decode_header(h))
4632 self.assertEqual(str(h2),
4633 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4634 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4635
R David Murray041015c2011-03-25 15:10:55 -04004636 def test_modify_returned_list_does_not_change_header(self):
4637 h = Header('test')
4638 chunks = email.header.decode_header(h)
4639 chunks.append(('ascii', 'test2'))
4640 self.assertEqual(str(h), 'test')
4641
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004642 def test_encoded_adjacent_nonencoded(self):
4643 eq = self.assertEqual
4644 h = Header()
4645 h.append('hello', 'iso-8859-1')
4646 h.append('world')
4647 s = h.encode()
4648 eq(s, '=?iso-8859-1?q?hello?= world')
4649 h = make_header(decode_header(s))
4650 eq(h.encode(), s)
4651
R David Murray07ea53c2012-06-02 17:56:49 -04004652 def test_whitespace_keeper(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004653 eq = self.assertEqual
4654 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4655 parts = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04004656 eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004657 hdr = make_header(parts)
4658 eq(hdr.encode(),
4659 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4660
4661 def test_broken_base64_header(self):
4662 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004663 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004664 raises(errors.HeaderParseError, decode_header, s)
4665
R. David Murray477efb32011-01-05 01:39:32 +00004666 def test_shift_jis_charset(self):
4667 h = Header('文', charset='shift_jis')
4668 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4669
R David Murrayde912762011-03-16 18:26:23 -04004670 def test_flatten_header_with_no_value(self):
4671 # Issue 11401 (regression from email 4.x) Note that the space after
4672 # the header doesn't reflect the input, but this is also the way
4673 # email 4.x behaved. At some point it would be nice to fix that.
4674 msg = email.message_from_string("EmptyHeader:")
4675 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4676
R David Murray01581ee2011-04-18 10:04:34 -04004677 def test_encode_preserves_leading_ws_on_value(self):
4678 msg = Message()
4679 msg['SomeHeader'] = ' value with leading ws'
4680 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4681
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004682
Ezio Melottib3aedd42010-11-20 19:04:17 +00004683
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004684# Test RFC 2231 header parameters (en/de)coding
4685class TestRFC2231(TestEmailBase):
R David Murray97f43c02012-06-24 05:03:27 -04004686
4687 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
4688 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004689 def test_get_param(self):
4690 eq = self.assertEqual
4691 msg = self._msgobj('msg_29.txt')
4692 eq(msg.get_param('title'),
4693 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4694 eq(msg.get_param('title', unquote=False),
4695 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4696
4697 def test_set_param(self):
4698 eq = self.ndiffAssertEqual
4699 msg = Message()
4700 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4701 charset='us-ascii')
4702 eq(msg.get_param('title'),
4703 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4704 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4705 charset='us-ascii', language='en')
4706 eq(msg.get_param('title'),
4707 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4708 msg = self._msgobj('msg_01.txt')
4709 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4710 charset='us-ascii', language='en')
4711 eq(msg.as_string(maxheaderlen=78), """\
4712Return-Path: <bbb@zzz.org>
4713Delivered-To: bbb@zzz.org
4714Received: by mail.zzz.org (Postfix, from userid 889)
4715\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4716MIME-Version: 1.0
4717Content-Transfer-Encoding: 7bit
4718Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4719From: bbb@ddd.com (John X. Doe)
4720To: bbb@zzz.org
4721Subject: This is a test message
4722Date: Fri, 4 May 2001 14:05:44 -0400
4723Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004724 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004725
4726
4727Hi,
4728
4729Do you like this message?
4730
4731-Me
4732""")
4733
R David Murraya2860e82011-04-16 09:20:30 -04004734 def test_set_param_requote(self):
4735 msg = Message()
4736 msg.set_param('title', 'foo')
4737 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4738 msg.set_param('title', 'bar', requote=False)
4739 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4740 # tspecial is still quoted.
4741 msg.set_param('title', "(bar)bell", requote=False)
4742 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4743
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004744 def test_del_param(self):
4745 eq = self.ndiffAssertEqual
4746 msg = self._msgobj('msg_01.txt')
4747 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4748 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4749 charset='us-ascii', language='en')
4750 msg.del_param('foo', header='Content-Type')
4751 eq(msg.as_string(maxheaderlen=78), """\
4752Return-Path: <bbb@zzz.org>
4753Delivered-To: bbb@zzz.org
4754Received: by mail.zzz.org (Postfix, from userid 889)
4755\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4756MIME-Version: 1.0
4757Content-Transfer-Encoding: 7bit
4758Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4759From: bbb@ddd.com (John X. Doe)
4760To: bbb@zzz.org
4761Subject: This is a test message
4762Date: Fri, 4 May 2001 14:05:44 -0400
4763Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004764 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004765
4766
4767Hi,
4768
4769Do you like this message?
4770
4771-Me
4772""")
4773
R David Murray97f43c02012-06-24 05:03:27 -04004774 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset
4775 # I changed the charset name, though, because the one in the file isn't
4776 # a legal charset name. Should add a test for an illegal charset.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004777 def test_rfc2231_get_content_charset(self):
4778 eq = self.assertEqual
4779 msg = self._msgobj('msg_32.txt')
4780 eq(msg.get_content_charset(), 'us-ascii')
4781
R David Murray97f43c02012-06-24 05:03:27 -04004782 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes
R. David Murraydfd7eb02010-12-24 22:36:49 +00004783 def test_rfc2231_parse_rfc_quoting(self):
4784 m = textwrap.dedent('''\
4785 Content-Disposition: inline;
4786 \tfilename*0*=''This%20is%20even%20more%20;
4787 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4788 \tfilename*2="is it not.pdf"
4789
4790 ''')
4791 msg = email.message_from_string(m)
4792 self.assertEqual(msg.get_filename(),
4793 'This is even more ***fun*** is it not.pdf')
4794 self.assertEqual(m, msg.as_string())
4795
R David Murray97f43c02012-06-24 05:03:27 -04004796 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
R. David Murraydfd7eb02010-12-24 22:36:49 +00004797 def test_rfc2231_parse_extra_quoting(self):
4798 m = textwrap.dedent('''\
4799 Content-Disposition: inline;
4800 \tfilename*0*="''This%20is%20even%20more%20";
4801 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4802 \tfilename*2="is it not.pdf"
4803
4804 ''')
4805 msg = email.message_from_string(m)
4806 self.assertEqual(msg.get_filename(),
4807 'This is even more ***fun*** is it not.pdf')
4808 self.assertEqual(m, msg.as_string())
4809
R David Murray97f43c02012-06-24 05:03:27 -04004810 # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset
4811 # but new test uses *0* because otherwise lang/charset is not valid.
4812 # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004813 def test_rfc2231_no_language_or_charset(self):
4814 m = '''\
4815Content-Transfer-Encoding: 8bit
4816Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4817Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4818
4819'''
4820 msg = email.message_from_string(m)
4821 param = msg.get_param('NAME')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02004822 self.assertNotIsInstance(param, tuple)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004823 self.assertEqual(
4824 param,
4825 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4826
R David Murray97f43c02012-06-24 05:03:27 -04004827 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004828 def test_rfc2231_no_language_or_charset_in_filename(self):
4829 m = '''\
4830Content-Disposition: inline;
4831\tfilename*0*="''This%20is%20even%20more%20";
4832\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4833\tfilename*2="is it not.pdf"
4834
4835'''
4836 msg = email.message_from_string(m)
4837 self.assertEqual(msg.get_filename(),
4838 'This is even more ***fun*** is it not.pdf')
4839
R David Murray97f43c02012-06-24 05:03:27 -04004840 # Duplicate of previous test?
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004841 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4842 m = '''\
4843Content-Disposition: inline;
4844\tfilename*0*="''This%20is%20even%20more%20";
4845\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4846\tfilename*2="is it not.pdf"
4847
4848'''
4849 msg = email.message_from_string(m)
4850 self.assertEqual(msg.get_filename(),
4851 'This is even more ***fun*** is it not.pdf')
4852
R David Murray97f43c02012-06-24 05:03:27 -04004853 # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded,
4854 # but the test below is wrong (the first part should be decoded).
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004855 def test_rfc2231_partly_encoded(self):
4856 m = '''\
4857Content-Disposition: inline;
4858\tfilename*0="''This%20is%20even%20more%20";
4859\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4860\tfilename*2="is it not.pdf"
4861
4862'''
4863 msg = email.message_from_string(m)
4864 self.assertEqual(
4865 msg.get_filename(),
4866 'This%20is%20even%20more%20***fun*** is it not.pdf')
4867
4868 def test_rfc2231_partly_nonencoded(self):
4869 m = '''\
4870Content-Disposition: inline;
4871\tfilename*0="This%20is%20even%20more%20";
4872\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4873\tfilename*2="is it not.pdf"
4874
4875'''
4876 msg = email.message_from_string(m)
4877 self.assertEqual(
4878 msg.get_filename(),
4879 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4880
4881 def test_rfc2231_no_language_or_charset_in_boundary(self):
4882 m = '''\
4883Content-Type: multipart/alternative;
4884\tboundary*0*="''This%20is%20even%20more%20";
4885\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4886\tboundary*2="is it not.pdf"
4887
4888'''
4889 msg = email.message_from_string(m)
4890 self.assertEqual(msg.get_boundary(),
4891 'This is even more ***fun*** is it not.pdf')
4892
4893 def test_rfc2231_no_language_or_charset_in_charset(self):
4894 # This is a nonsensical charset value, but tests the code anyway
4895 m = '''\
4896Content-Type: text/plain;
4897\tcharset*0*="This%20is%20even%20more%20";
4898\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4899\tcharset*2="is it not.pdf"
4900
4901'''
4902 msg = email.message_from_string(m)
4903 self.assertEqual(msg.get_content_charset(),
4904 'this is even more ***fun*** is it not.pdf')
4905
R David Murray97f43c02012-06-24 05:03:27 -04004906 # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004907 def test_rfc2231_bad_encoding_in_filename(self):
4908 m = '''\
4909Content-Disposition: inline;
4910\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4911\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4912\tfilename*2="is it not.pdf"
4913
4914'''
4915 msg = email.message_from_string(m)
4916 self.assertEqual(msg.get_filename(),
4917 'This is even more ***fun*** is it not.pdf')
4918
4919 def test_rfc2231_bad_encoding_in_charset(self):
4920 m = """\
4921Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4922
4923"""
4924 msg = email.message_from_string(m)
4925 # This should return None because non-ascii characters in the charset
4926 # are not allowed.
4927 self.assertEqual(msg.get_content_charset(), None)
4928
4929 def test_rfc2231_bad_character_in_charset(self):
4930 m = """\
4931Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4932
4933"""
4934 msg = email.message_from_string(m)
4935 # This should return None because non-ascii characters in the charset
4936 # are not allowed.
4937 self.assertEqual(msg.get_content_charset(), None)
4938
4939 def test_rfc2231_bad_character_in_filename(self):
4940 m = '''\
4941Content-Disposition: inline;
4942\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4943\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4944\tfilename*2*="is it not.pdf%E2"
4945
4946'''
4947 msg = email.message_from_string(m)
4948 self.assertEqual(msg.get_filename(),
4949 'This is even more ***fun*** is it not.pdf\ufffd')
4950
4951 def test_rfc2231_unknown_encoding(self):
4952 m = """\
4953Content-Transfer-Encoding: 8bit
4954Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4955
4956"""
4957 msg = email.message_from_string(m)
4958 self.assertEqual(msg.get_filename(), 'myfile.txt')
4959
4960 def test_rfc2231_single_tick_in_filename_extended(self):
4961 eq = self.assertEqual
4962 m = """\
4963Content-Type: application/x-foo;
4964\tname*0*=\"Frank's\"; name*1*=\" Document\"
4965
4966"""
4967 msg = email.message_from_string(m)
4968 charset, language, s = msg.get_param('name')
4969 eq(charset, None)
4970 eq(language, None)
4971 eq(s, "Frank's Document")
4972
R David Murray97f43c02012-06-24 05:03:27 -04004973 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004974 def test_rfc2231_single_tick_in_filename(self):
4975 m = """\
4976Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4977
4978"""
4979 msg = email.message_from_string(m)
4980 param = msg.get_param('name')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02004981 self.assertNotIsInstance(param, tuple)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004982 self.assertEqual(param, "Frank's Document")
4983
R David Murray97f43c02012-06-24 05:03:27 -04004984 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004985 def test_rfc2231_tick_attack_extended(self):
4986 eq = self.assertEqual
4987 m = """\
4988Content-Type: application/x-foo;
4989\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4990
4991"""
4992 msg = email.message_from_string(m)
4993 charset, language, s = msg.get_param('name')
4994 eq(charset, 'us-ascii')
4995 eq(language, 'en-us')
4996 eq(s, "Frank's Document")
4997
R David Murray97f43c02012-06-24 05:03:27 -04004998 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004999 def test_rfc2231_tick_attack(self):
5000 m = """\
5001Content-Type: application/x-foo;
5002\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
5003
5004"""
5005 msg = email.message_from_string(m)
5006 param = msg.get_param('name')
Serhiy Storchaka328cf3c2013-11-16 12:56:23 +02005007 self.assertNotIsInstance(param, tuple)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005008 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
5009
R David Murray97f43c02012-06-24 05:03:27 -04005010 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005011 def test_rfc2231_no_extended_values(self):
5012 eq = self.assertEqual
5013 m = """\
5014Content-Type: application/x-foo; name=\"Frank's Document\"
5015
5016"""
5017 msg = email.message_from_string(m)
5018 eq(msg.get_param('name'), "Frank's Document")
5019
R David Murray97f43c02012-06-24 05:03:27 -04005020 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005021 def test_rfc2231_encoded_then_unencoded_segments(self):
5022 eq = self.assertEqual
5023 m = """\
5024Content-Type: application/x-foo;
5025\tname*0*=\"us-ascii'en-us'My\";
5026\tname*1=\" Document\";
5027\tname*2*=\" For You\"
5028
5029"""
5030 msg = email.message_from_string(m)
5031 charset, language, s = msg.get_param('name')
5032 eq(charset, 'us-ascii')
5033 eq(language, 'en-us')
5034 eq(s, 'My Document For You')
5035
R David Murray97f43c02012-06-24 05:03:27 -04005036 # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments
5037 # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005038 def test_rfc2231_unencoded_then_encoded_segments(self):
5039 eq = self.assertEqual
5040 m = """\
5041Content-Type: application/x-foo;
5042\tname*0=\"us-ascii'en-us'My\";
5043\tname*1*=\" Document\";
5044\tname*2*=\" For You\"
5045
5046"""
5047 msg = email.message_from_string(m)
5048 charset, language, s = msg.get_param('name')
5049 eq(charset, 'us-ascii')
5050 eq(language, 'en-us')
5051 eq(s, 'My Document For You')
5052
5053
Ezio Melottib3aedd42010-11-20 19:04:17 +00005054
R. David Murraya8f480f2010-01-16 18:30:03 +00005055# Tests to ensure that signed parts of an email are completely preserved, as
5056# required by RFC1847 section 2.1. Note that these are incomplete, because the
5057# email package does not currently always preserve the body. See issue 1670765.
5058class TestSigned(TestEmailBase):
5059
5060 def _msg_and_obj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -04005061 with openfile(filename) as fp:
R. David Murraya8f480f2010-01-16 18:30:03 +00005062 original = fp.read()
5063 msg = email.message_from_string(original)
5064 return original, msg
5065
5066 def _signed_parts_eq(self, original, result):
5067 # Extract the first mime part of each message
5068 import re
5069 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
5070 inpart = repart.search(original).group(2)
5071 outpart = repart.search(result).group(2)
5072 self.assertEqual(outpart, inpart)
5073
5074 def test_long_headers_as_string(self):
5075 original, msg = self._msg_and_obj('msg_45.txt')
5076 result = msg.as_string()
5077 self._signed_parts_eq(original, result)
5078
5079 def test_long_headers_as_string_maxheaderlen(self):
5080 original, msg = self._msg_and_obj('msg_45.txt')
5081 result = msg.as_string(maxheaderlen=60)
5082 self._signed_parts_eq(original, result)
5083
5084 def test_long_headers_flatten(self):
5085 original, msg = self._msg_and_obj('msg_45.txt')
5086 fp = StringIO()
5087 Generator(fp).flatten(msg)
5088 result = fp.getvalue()
5089 self._signed_parts_eq(original, result)
5090
5091
Ezio Melottib3aedd42010-11-20 19:04:17 +00005092
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005093if __name__ == '__main__':
R David Murray9aaba782011-03-21 17:17:06 -04005094 unittest.main()