blob: e11194b5b85246f1b0e641678bd80ec93c9af751 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
R. David Murray719a4492010-11-21 16:53:48 +00005import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00006import time
7import base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +00008import unittest
R. David Murray96fd54e2010-10-08 15:55:28 +00009import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000010
R. David Murray96fd54e2010-10-08 15:55:28 +000011from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000012from itertools import chain
13
14import email
R David Murrayc27e5222012-05-25 15:01:48 -040015import email.policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016
17from email.charset import Charset
18from email.header import Header, decode_header, make_header
19from email.parser import Parser, HeaderParser
R David Murray638d40b2012-08-24 11:14:13 -040020from email.generator import Generator, DecodedGenerator, BytesGenerator
Guido van Rossum8b3febe2007-08-30 01:15:14 +000021from email.message import Message
22from email.mime.application import MIMEApplication
23from email.mime.audio import MIMEAudio
24from email.mime.text import MIMEText
25from email.mime.image import MIMEImage
26from email.mime.base import MIMEBase
27from email.mime.message import MIMEMessage
28from email.mime.multipart import MIMEMultipart
29from email import utils
30from email import errors
31from email import encoders
32from email import iterators
33from email import base64mime
34from email import quoprimime
35
R David Murray965794e2013-03-07 18:16:47 -050036from test.support import unlink
R David Murraya256bac2011-03-31 12:20:23 -040037from test.test_email import openfile, TestEmailBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +000038
R David Murray612528d2013-03-15 20:38:15 -040039# These imports are documented to work, but we are testing them using a
40# different path, so we import them here just to make sure they are importable.
41from email.parser import FeedParser, BytesFeedParser
42
Guido van Rossum8b3febe2007-08-30 01:15:14 +000043NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Guido van Rossum8b3febe2007-08-30 01:15:14 +000048# Test various aspects of the Message class's API
49class TestMessageAPI(TestEmailBase):
50 def test_get_all(self):
51 eq = self.assertEqual
52 msg = self._msgobj('msg_20.txt')
53 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
54 eq(msg.get_all('xx', 'n/a'), 'n/a')
55
R. David Murraye5db2632010-11-20 15:10:13 +000056 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000057 eq = self.assertEqual
58 msg = Message()
59 eq(msg.get_charset(), None)
60 charset = Charset('iso-8859-1')
61 msg.set_charset(charset)
62 eq(msg['mime-version'], '1.0')
63 eq(msg.get_content_type(), 'text/plain')
64 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
65 eq(msg.get_param('charset'), 'iso-8859-1')
66 eq(msg['content-transfer-encoding'], 'quoted-printable')
67 eq(msg.get_charset().input_charset, 'iso-8859-1')
68 # Remove the charset
69 msg.set_charset(None)
70 eq(msg.get_charset(), None)
71 eq(msg['content-type'], 'text/plain')
72 # Try adding a charset when there's already MIME headers present
73 msg = Message()
74 msg['MIME-Version'] = '2.0'
75 msg['Content-Type'] = 'text/x-weird'
76 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
77 msg.set_charset(charset)
78 eq(msg['mime-version'], '2.0')
79 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
80 eq(msg['content-transfer-encoding'], 'quinted-puntable')
81
82 def test_set_charset_from_string(self):
83 eq = self.assertEqual
84 msg = Message()
85 msg.set_charset('us-ascii')
86 eq(msg.get_charset().input_charset, 'us-ascii')
87 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
88
89 def test_set_payload_with_charset(self):
90 msg = Message()
91 charset = Charset('iso-8859-1')
92 msg.set_payload('This is a string payload', charset)
93 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
94
95 def test_get_charsets(self):
96 eq = self.assertEqual
97
98 msg = self._msgobj('msg_08.txt')
99 charsets = msg.get_charsets()
100 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
101
102 msg = self._msgobj('msg_09.txt')
103 charsets = msg.get_charsets('dingbat')
104 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
105 'koi8-r'])
106
107 msg = self._msgobj('msg_12.txt')
108 charsets = msg.get_charsets()
109 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
110 'iso-8859-3', 'us-ascii', 'koi8-r'])
111
112 def test_get_filename(self):
113 eq = self.assertEqual
114
115 msg = self._msgobj('msg_04.txt')
116 filenames = [p.get_filename() for p in msg.get_payload()]
117 eq(filenames, ['msg.txt', 'msg.txt'])
118
119 msg = self._msgobj('msg_07.txt')
120 subpart = msg.get_payload(1)
121 eq(subpart.get_filename(), 'dingusfish.gif')
122
123 def test_get_filename_with_name_parameter(self):
124 eq = self.assertEqual
125
126 msg = self._msgobj('msg_44.txt')
127 filenames = [p.get_filename() for p in msg.get_payload()]
128 eq(filenames, ['msg.txt', 'msg.txt'])
129
130 def test_get_boundary(self):
131 eq = self.assertEqual
132 msg = self._msgobj('msg_07.txt')
133 # No quotes!
134 eq(msg.get_boundary(), 'BOUNDARY')
135
136 def test_set_boundary(self):
137 eq = self.assertEqual
138 # This one has no existing boundary parameter, but the Content-Type:
139 # header appears fifth.
140 msg = self._msgobj('msg_01.txt')
141 msg.set_boundary('BOUNDARY')
142 header, value = msg.items()[4]
143 eq(header.lower(), 'content-type')
144 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
145 # This one has a Content-Type: header, with a boundary, stuck in the
146 # middle of its headers. Make sure the order is preserved; it should
147 # be fifth.
148 msg = self._msgobj('msg_04.txt')
149 msg.set_boundary('BOUNDARY')
150 header, value = msg.items()[4]
151 eq(header.lower(), 'content-type')
152 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
153 # And this one has no Content-Type: header at all.
154 msg = self._msgobj('msg_03.txt')
155 self.assertRaises(errors.HeaderParseError,
156 msg.set_boundary, 'BOUNDARY')
157
R. David Murray73a559d2010-12-21 18:07:59 +0000158 def test_make_boundary(self):
159 msg = MIMEMultipart('form-data')
160 # Note that when the boundary gets created is an implementation
161 # detail and might change.
162 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
163 # Trigger creation of boundary
164 msg.as_string()
165 self.assertEqual(msg.items()[0][1][:33],
166 'multipart/form-data; boundary="==')
167 # XXX: there ought to be tests of the uniqueness of the boundary, too.
168
R. David Murray57c45ac2010-02-21 04:39:40 +0000169 def test_message_rfc822_only(self):
170 # Issue 7970: message/rfc822 not in multipart parsed by
171 # HeaderParser caused an exception when flattened.
R David Murray28346b82011-03-31 11:40:20 -0400172 with openfile('msg_46.txt') as fp:
Brett Cannon384917a2010-10-29 23:08:36 +0000173 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000174 parser = HeaderParser()
175 msg = parser.parsestr(msgdata)
176 out = StringIO()
177 gen = Generator(out, True, 0)
178 gen.flatten(msg, False)
179 self.assertEqual(out.getvalue(), msgdata)
180
R David Murrayb35c8502011-04-13 16:46:05 -0400181 def test_byte_message_rfc822_only(self):
182 # Make sure new bytes header parser also passes this.
183 with openfile('msg_46.txt', 'rb') as fp:
184 msgdata = fp.read()
185 parser = email.parser.BytesHeaderParser()
186 msg = parser.parsebytes(msgdata)
187 out = BytesIO()
188 gen = email.generator.BytesGenerator(out)
189 gen.flatten(msg)
190 self.assertEqual(out.getvalue(), msgdata)
191
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000192 def test_get_decoded_payload(self):
193 eq = self.assertEqual
194 msg = self._msgobj('msg_10.txt')
195 # The outer message is a multipart
196 eq(msg.get_payload(decode=True), None)
197 # Subpart 1 is 7bit encoded
198 eq(msg.get_payload(0).get_payload(decode=True),
199 b'This is a 7bit encoded message.\n')
200 # Subpart 2 is quopri
201 eq(msg.get_payload(1).get_payload(decode=True),
202 b'\xa1This is a Quoted Printable encoded message!\n')
203 # Subpart 3 is base64
204 eq(msg.get_payload(2).get_payload(decode=True),
205 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000206 # Subpart 4 is base64 with a trailing newline, which
207 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000208 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000209 b'This is a Base64 encoded message.\n')
210 # Subpart 5 has no Content-Transfer-Encoding: header.
211 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000212 b'This has no Content-Transfer-Encoding: header.\n')
213
214 def test_get_decoded_uu_payload(self):
215 eq = self.assertEqual
216 msg = Message()
217 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
218 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
219 msg['content-transfer-encoding'] = cte
220 eq(msg.get_payload(decode=True), b'hello world')
221 # Now try some bogus data
222 msg.set_payload('foo')
223 eq(msg.get_payload(decode=True), b'foo')
224
R David Murraya2860e82011-04-16 09:20:30 -0400225 def test_get_payload_n_raises_on_non_multipart(self):
226 msg = Message()
227 self.assertRaises(TypeError, msg.get_payload, 1)
228
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000229 def test_decoded_generator(self):
230 eq = self.assertEqual
231 msg = self._msgobj('msg_07.txt')
232 with openfile('msg_17.txt') as fp:
233 text = fp.read()
234 s = StringIO()
235 g = DecodedGenerator(s)
236 g.flatten(msg)
237 eq(s.getvalue(), text)
238
239 def test__contains__(self):
240 msg = Message()
241 msg['From'] = 'Me'
242 msg['to'] = 'You'
243 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000244 self.assertTrue('from' in msg)
245 self.assertTrue('From' in msg)
246 self.assertTrue('FROM' in msg)
247 self.assertTrue('to' in msg)
248 self.assertTrue('To' in msg)
249 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000250
251 def test_as_string(self):
252 eq = self.ndiffAssertEqual
253 msg = self._msgobj('msg_01.txt')
254 with openfile('msg_01.txt') as fp:
255 text = fp.read()
256 eq(text, str(msg))
257 fullrepr = msg.as_string(unixfrom=True)
258 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000259 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000260 eq(text, NL.join(lines[1:]))
261
R David Murray97f43c02012-06-24 05:03:27 -0400262 # test_headerregistry.TestContentTypeHeader.bad_params
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000263 def test_bad_param(self):
264 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
265 self.assertEqual(msg.get_param('baz'), '')
266
267 def test_missing_filename(self):
268 msg = email.message_from_string("From: foo\n")
269 self.assertEqual(msg.get_filename(), None)
270
271 def test_bogus_filename(self):
272 msg = email.message_from_string(
273 "Content-Disposition: blarg; filename\n")
274 self.assertEqual(msg.get_filename(), '')
275
276 def test_missing_boundary(self):
277 msg = email.message_from_string("From: foo\n")
278 self.assertEqual(msg.get_boundary(), None)
279
280 def test_get_params(self):
281 eq = self.assertEqual
282 msg = email.message_from_string(
283 'X-Header: foo=one; bar=two; baz=three\n')
284 eq(msg.get_params(header='x-header'),
285 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
286 msg = email.message_from_string(
287 'X-Header: foo; bar=one; baz=two\n')
288 eq(msg.get_params(header='x-header'),
289 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
290 eq(msg.get_params(), None)
291 msg = email.message_from_string(
292 'X-Header: foo; bar="one"; baz=two\n')
293 eq(msg.get_params(header='x-header'),
294 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
295
R David Murray97f43c02012-06-24 05:03:27 -0400296 # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000297 def test_get_param_liberal(self):
298 msg = Message()
299 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
300 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
301
302 def test_get_param(self):
303 eq = self.assertEqual
304 msg = email.message_from_string(
305 "X-Header: foo=one; bar=two; baz=three\n")
306 eq(msg.get_param('bar', header='x-header'), 'two')
307 eq(msg.get_param('quuz', header='x-header'), None)
308 eq(msg.get_param('quuz'), None)
309 msg = email.message_from_string(
310 'X-Header: foo; bar="one"; baz=two\n')
311 eq(msg.get_param('foo', header='x-header'), '')
312 eq(msg.get_param('bar', header='x-header'), 'one')
313 eq(msg.get_param('baz', header='x-header'), 'two')
314 # XXX: We are not RFC-2045 compliant! We cannot parse:
315 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
316 # msg.get_param("weird")
317 # yet.
318
R David Murray97f43c02012-06-24 05:03:27 -0400319 # test_headerregistry.TestContentTypeHeader.spaces_around_semis
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000320 def test_get_param_funky_continuation_lines(self):
321 msg = self._msgobj('msg_22.txt')
322 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
323
R David Murray97f43c02012-06-24 05:03:27 -0400324 # test_headerregistry.TestContentTypeHeader.semis_inside_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000325 def test_get_param_with_semis_in_quotes(self):
326 msg = email.message_from_string(
327 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
328 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
329 self.assertEqual(msg.get_param('name', unquote=False),
330 '"Jim&amp;&amp;Jill"')
331
R David Murray97f43c02012-06-24 05:03:27 -0400332 # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value
R. David Murrayd48739f2010-04-14 18:59:18 +0000333 def test_get_param_with_quotes(self):
334 msg = email.message_from_string(
335 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
336 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
337 msg = email.message_from_string(
338 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
339 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
340
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000341 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000342 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000343 msg = email.message_from_string('Header: exists')
344 unless('header' in msg)
345 unless('Header' in msg)
346 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000347 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000348
349 def test_set_param(self):
350 eq = self.assertEqual
351 msg = Message()
352 msg.set_param('charset', 'iso-2022-jp')
353 eq(msg.get_param('charset'), 'iso-2022-jp')
354 msg.set_param('importance', 'high value')
355 eq(msg.get_param('importance'), 'high value')
356 eq(msg.get_param('importance', unquote=False), '"high value"')
357 eq(msg.get_params(), [('text/plain', ''),
358 ('charset', 'iso-2022-jp'),
359 ('importance', 'high value')])
360 eq(msg.get_params(unquote=False), [('text/plain', ''),
361 ('charset', '"iso-2022-jp"'),
362 ('importance', '"high value"')])
363 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
364 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
365
366 def test_del_param(self):
367 eq = self.assertEqual
368 msg = self._msgobj('msg_05.txt')
369 eq(msg.get_params(),
370 [('multipart/report', ''), ('report-type', 'delivery-status'),
371 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
372 old_val = msg.get_param("report-type")
373 msg.del_param("report-type")
374 eq(msg.get_params(),
375 [('multipart/report', ''),
376 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
377 msg.set_param("report-type", old_val)
378 eq(msg.get_params(),
379 [('multipart/report', ''),
380 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
381 ('report-type', old_val)])
382
383 def test_del_param_on_other_header(self):
384 msg = Message()
385 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
386 msg.del_param('filename', 'content-disposition')
387 self.assertEqual(msg['content-disposition'], 'attachment')
388
R David Murraya2860e82011-04-16 09:20:30 -0400389 def test_del_param_on_nonexistent_header(self):
390 msg = Message()
R David Murray271ade82013-07-25 12:11:55 -0400391 # Deleting param on empty msg should not raise exception.
R David Murraya2860e82011-04-16 09:20:30 -0400392 msg.del_param('filename', 'content-disposition')
393
394 def test_del_nonexistent_param(self):
395 msg = Message()
396 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
397 existing_header = msg['Content-Type']
398 msg.del_param('foobar', header='Content-Type')
R David Murray271ade82013-07-25 12:11:55 -0400399 self.assertEqual(msg['Content-Type'], existing_header)
R David Murraya2860e82011-04-16 09:20:30 -0400400
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000401 def test_set_type(self):
402 eq = self.assertEqual
403 msg = Message()
404 self.assertRaises(ValueError, msg.set_type, 'text')
405 msg.set_type('text/plain')
406 eq(msg['content-type'], 'text/plain')
407 msg.set_param('charset', 'us-ascii')
408 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
409 msg.set_type('text/html')
410 eq(msg['content-type'], 'text/html; charset="us-ascii"')
411
412 def test_set_type_on_other_header(self):
413 msg = Message()
414 msg['X-Content-Type'] = 'text/plain'
415 msg.set_type('application/octet-stream', 'X-Content-Type')
416 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
417
418 def test_get_content_type_missing(self):
419 msg = Message()
420 self.assertEqual(msg.get_content_type(), 'text/plain')
421
422 def test_get_content_type_missing_with_default_type(self):
423 msg = Message()
424 msg.set_default_type('message/rfc822')
425 self.assertEqual(msg.get_content_type(), 'message/rfc822')
426
427 def test_get_content_type_from_message_implicit(self):
428 msg = self._msgobj('msg_30.txt')
429 self.assertEqual(msg.get_payload(0).get_content_type(),
430 'message/rfc822')
431
432 def test_get_content_type_from_message_explicit(self):
433 msg = self._msgobj('msg_28.txt')
434 self.assertEqual(msg.get_payload(0).get_content_type(),
435 'message/rfc822')
436
437 def test_get_content_type_from_message_text_plain_implicit(self):
438 msg = self._msgobj('msg_03.txt')
439 self.assertEqual(msg.get_content_type(), 'text/plain')
440
441 def test_get_content_type_from_message_text_plain_explicit(self):
442 msg = self._msgobj('msg_01.txt')
443 self.assertEqual(msg.get_content_type(), 'text/plain')
444
445 def test_get_content_maintype_missing(self):
446 msg = Message()
447 self.assertEqual(msg.get_content_maintype(), 'text')
448
449 def test_get_content_maintype_missing_with_default_type(self):
450 msg = Message()
451 msg.set_default_type('message/rfc822')
452 self.assertEqual(msg.get_content_maintype(), 'message')
453
454 def test_get_content_maintype_from_message_implicit(self):
455 msg = self._msgobj('msg_30.txt')
456 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
457
458 def test_get_content_maintype_from_message_explicit(self):
459 msg = self._msgobj('msg_28.txt')
460 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
461
462 def test_get_content_maintype_from_message_text_plain_implicit(self):
463 msg = self._msgobj('msg_03.txt')
464 self.assertEqual(msg.get_content_maintype(), 'text')
465
466 def test_get_content_maintype_from_message_text_plain_explicit(self):
467 msg = self._msgobj('msg_01.txt')
468 self.assertEqual(msg.get_content_maintype(), 'text')
469
470 def test_get_content_subtype_missing(self):
471 msg = Message()
472 self.assertEqual(msg.get_content_subtype(), 'plain')
473
474 def test_get_content_subtype_missing_with_default_type(self):
475 msg = Message()
476 msg.set_default_type('message/rfc822')
477 self.assertEqual(msg.get_content_subtype(), 'rfc822')
478
479 def test_get_content_subtype_from_message_implicit(self):
480 msg = self._msgobj('msg_30.txt')
481 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
482
483 def test_get_content_subtype_from_message_explicit(self):
484 msg = self._msgobj('msg_28.txt')
485 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
486
487 def test_get_content_subtype_from_message_text_plain_implicit(self):
488 msg = self._msgobj('msg_03.txt')
489 self.assertEqual(msg.get_content_subtype(), 'plain')
490
491 def test_get_content_subtype_from_message_text_plain_explicit(self):
492 msg = self._msgobj('msg_01.txt')
493 self.assertEqual(msg.get_content_subtype(), 'plain')
494
495 def test_get_content_maintype_error(self):
496 msg = Message()
497 msg['Content-Type'] = 'no-slash-in-this-string'
498 self.assertEqual(msg.get_content_maintype(), 'text')
499
500 def test_get_content_subtype_error(self):
501 msg = Message()
502 msg['Content-Type'] = 'no-slash-in-this-string'
503 self.assertEqual(msg.get_content_subtype(), 'plain')
504
505 def test_replace_header(self):
506 eq = self.assertEqual
507 msg = Message()
508 msg.add_header('First', 'One')
509 msg.add_header('Second', 'Two')
510 msg.add_header('Third', 'Three')
511 eq(msg.keys(), ['First', 'Second', 'Third'])
512 eq(msg.values(), ['One', 'Two', 'Three'])
513 msg.replace_header('Second', 'Twenty')
514 eq(msg.keys(), ['First', 'Second', 'Third'])
515 eq(msg.values(), ['One', 'Twenty', 'Three'])
516 msg.add_header('First', 'Eleven')
517 msg.replace_header('First', 'One Hundred')
518 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
519 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
520 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
521
R David Murray80e0aee2012-05-27 21:23:34 -0400522 # test_defect_handling:test_invalid_chars_in_base64_payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000523 def test_broken_base64_payload(self):
524 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
525 msg = Message()
526 msg['content-type'] = 'audio/x-midi'
527 msg['content-transfer-encoding'] = 'base64'
528 msg.set_payload(x)
529 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -0400530 (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0'
531 b'\xa1\x00p\xf6\xbf\xe9\x0f'))
532 self.assertIsInstance(msg.defects[0],
533 errors.InvalidBase64CharactersDefect)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000534
R David Murraya2860e82011-04-16 09:20:30 -0400535 def test_broken_unicode_payload(self):
536 # This test improves coverage but is not a compliance test.
537 # The behavior in this situation is currently undefined by the API.
538 x = 'this is a br\xf6ken thing to do'
539 msg = Message()
540 msg['content-type'] = 'text/plain'
541 msg['content-transfer-encoding'] = '8bit'
542 msg.set_payload(x)
543 self.assertEqual(msg.get_payload(decode=True),
544 bytes(x, 'raw-unicode-escape'))
545
546 def test_questionable_bytes_payload(self):
547 # This test improves coverage but is not a compliance test,
548 # since it involves poking inside the black box.
549 x = 'this is a quéstionable thing to do'.encode('utf-8')
550 msg = Message()
551 msg['content-type'] = 'text/plain; charset="utf-8"'
552 msg['content-transfer-encoding'] = '8bit'
553 msg._payload = x
554 self.assertEqual(msg.get_payload(decode=True), x)
555
R. David Murray7ec754b2010-12-13 23:51:19 +0000556 # Issue 1078919
557 def test_ascii_add_header(self):
558 msg = Message()
559 msg.add_header('Content-Disposition', 'attachment',
560 filename='bud.gif')
561 self.assertEqual('attachment; filename="bud.gif"',
562 msg['Content-Disposition'])
563
564 def test_noascii_add_header(self):
565 msg = Message()
566 msg.add_header('Content-Disposition', 'attachment',
567 filename="Fußballer.ppt")
568 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000569 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000570 msg['Content-Disposition'])
571
572 def test_nonascii_add_header_via_triple(self):
573 msg = Message()
574 msg.add_header('Content-Disposition', 'attachment',
575 filename=('iso-8859-1', '', 'Fußballer.ppt'))
576 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000577 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
578 msg['Content-Disposition'])
579
580 def test_ascii_add_header_with_tspecial(self):
581 msg = Message()
582 msg.add_header('Content-Disposition', 'attachment',
583 filename="windows [filename].ppt")
584 self.assertEqual(
585 'attachment; filename="windows [filename].ppt"',
586 msg['Content-Disposition'])
587
588 def test_nonascii_add_header_with_tspecial(self):
589 msg = Message()
590 msg.add_header('Content-Disposition', 'attachment',
591 filename="Fußballer [filename].ppt")
592 self.assertEqual(
593 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000594 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000595
R David Murray00ae4352013-08-21 21:10:31 -0400596 def test_binary_quopri_payload(self):
597 for charset in ('latin-1', 'ascii'):
598 msg = Message()
599 msg['content-type'] = 'text/plain; charset=%s' % charset
600 msg['content-transfer-encoding'] = 'quoted-printable'
601 msg.set_payload(b'foo=e6=96=87bar')
602 self.assertEqual(
603 msg.get_payload(decode=True),
604 b'foo\xe6\x96\x87bar',
605 'get_payload returns wrong result with charset %s.' % charset)
606
607 def test_binary_base64_payload(self):
608 for charset in ('latin-1', 'ascii'):
609 msg = Message()
610 msg['content-type'] = 'text/plain; charset=%s' % charset
611 msg['content-transfer-encoding'] = 'base64'
612 msg.set_payload(b'Zm9v5paHYmFy')
613 self.assertEqual(
614 msg.get_payload(decode=True),
615 b'foo\xe6\x96\x87bar',
616 'get_payload returns wrong result with charset %s.' % charset)
617
618 def test_binary_uuencode_payload(self):
619 for charset in ('latin-1', 'ascii'):
620 for encoding in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
621 msg = Message()
622 msg['content-type'] = 'text/plain; charset=%s' % charset
623 msg['content-transfer-encoding'] = encoding
624 msg.set_payload(b"begin 666 -\n)9F]OYI:'8F%R\n \nend\n")
625 self.assertEqual(
626 msg.get_payload(decode=True),
627 b'foo\xe6\x96\x87bar',
628 str(('get_payload returns wrong result ',
629 'with charset {0} and encoding {1}.')).\
630 format(charset, encoding))
631
R David Murraya2860e82011-04-16 09:20:30 -0400632 def test_add_header_with_name_only_param(self):
633 msg = Message()
634 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
635 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
636
637 def test_add_header_with_no_value(self):
638 msg = Message()
639 msg.add_header('X-Status', None)
640 self.assertEqual('', msg['X-Status'])
641
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000642 # Issue 5871: reject an attempt to embed a header inside a header value
643 # (header injection attack).
644 def test_embeded_header_via_Header_rejected(self):
645 msg = Message()
646 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
647 self.assertRaises(errors.HeaderParseError, msg.as_string)
648
649 def test_embeded_header_via_string_rejected(self):
650 msg = Message()
651 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
652 self.assertRaises(errors.HeaderParseError, msg.as_string)
653
R David Murray7441a7a2012-03-14 02:59:51 -0400654 def test_unicode_header_defaults_to_utf8_encoding(self):
655 # Issue 14291
656 m = MIMEText('abc\n')
657 m['Subject'] = 'É test'
658 self.assertEqual(str(m),textwrap.dedent("""\
659 Content-Type: text/plain; charset="us-ascii"
660 MIME-Version: 1.0
661 Content-Transfer-Encoding: 7bit
662 Subject: =?utf-8?q?=C3=89_test?=
663
664 abc
665 """))
666
R David Murray8680bcc2012-03-22 22:17:51 -0400667 def test_unicode_body_defaults_to_utf8_encoding(self):
668 # Issue 14291
669 m = MIMEText('É testabc\n')
670 self.assertEqual(str(m),textwrap.dedent("""\
R David Murray8680bcc2012-03-22 22:17:51 -0400671 Content-Type: text/plain; charset="utf-8"
R David Murray42243c42012-03-22 22:40:44 -0400672 MIME-Version: 1.0
R David Murray8680bcc2012-03-22 22:17:51 -0400673 Content-Transfer-Encoding: base64
674
675 w4kgdGVzdGFiYwo=
676 """))
677
678
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000679# Test the email.encoders module
680class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400681
682 def test_EncodersEncode_base64(self):
683 with openfile('PyBanner048.gif', 'rb') as fp:
684 bindata = fp.read()
685 mimed = email.mime.image.MIMEImage(bindata)
686 base64ed = mimed.get_payload()
687 # the transfer-encoded body lines should all be <=76 characters
688 lines = base64ed.split('\n')
689 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
690
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000691 def test_encode_empty_payload(self):
692 eq = self.assertEqual
693 msg = Message()
694 msg.set_charset('us-ascii')
695 eq(msg['content-transfer-encoding'], '7bit')
696
697 def test_default_cte(self):
698 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000699 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000700 msg = MIMEText('hello world')
701 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000702 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000703 msg = MIMEText('hello \xf8 world')
R David Murray8680bcc2012-03-22 22:17:51 -0400704 eq(msg['content-transfer-encoding'], 'base64')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000705 # And now with a different charset
706 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
707 eq(msg['content-transfer-encoding'], 'quoted-printable')
708
R. David Murraye85200d2010-05-06 01:41:14 +0000709 def test_encode7or8bit(self):
710 # Make sure a charset whose input character set is 8bit but
711 # whose output character set is 7bit gets a transfer-encoding
712 # of 7bit.
713 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000714 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000715 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000716
R David Murrayf581b372013-02-05 10:49:49 -0500717 def test_qp_encode_latin1(self):
718 msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1')
719 self.assertEqual(str(msg), textwrap.dedent("""\
720 MIME-Version: 1.0
721 Content-Type: text/text; charset="iso-8859-1"
722 Content-Transfer-Encoding: quoted-printable
723
724 =E1=F6
725 """))
726
727 def test_qp_encode_non_latin1(self):
728 # Issue 16948
729 msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2')
730 self.assertEqual(str(msg), textwrap.dedent("""\
731 MIME-Version: 1.0
732 Content-Type: text/text; charset="iso-8859-2"
733 Content-Transfer-Encoding: quoted-printable
734
735 =BF
736 """))
737
Ezio Melottib3aedd42010-11-20 19:04:17 +0000738
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000739# Test long header wrapping
740class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400741
742 maxDiff = None
743
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000744 def test_split_long_continuation(self):
745 eq = self.ndiffAssertEqual
746 msg = email.message_from_string("""\
747Subject: bug demonstration
748\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
749\tmore text
750
751test
752""")
753 sfp = StringIO()
754 g = Generator(sfp)
755 g.flatten(msg)
756 eq(sfp.getvalue(), """\
757Subject: bug demonstration
758\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
759\tmore text
760
761test
762""")
763
764 def test_another_long_almost_unsplittable_header(self):
765 eq = self.ndiffAssertEqual
766 hstr = """\
767bug demonstration
768\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
769\tmore text"""
770 h = Header(hstr, continuation_ws='\t')
771 eq(h.encode(), """\
772bug demonstration
773\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
774\tmore text""")
775 h = Header(hstr.replace('\t', ' '))
776 eq(h.encode(), """\
777bug demonstration
778 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
779 more text""")
780
781 def test_long_nonstring(self):
782 eq = self.ndiffAssertEqual
783 g = Charset("iso-8859-1")
784 cz = Charset("iso-8859-2")
785 utf8 = Charset("utf-8")
786 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
787 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
788 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
789 b'bef\xf6rdert. ')
790 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
791 b'd\xf9vtipu.. ')
792 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
793 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
794 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
795 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
796 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
797 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
798 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
799 '\u3044\u307e\u3059\u3002')
800 h = Header(g_head, g, header_name='Subject')
801 h.append(cz_head, cz)
802 h.append(utf8_head, utf8)
803 msg = Message()
804 msg['Subject'] = h
805 sfp = StringIO()
806 g = Generator(sfp)
807 g.flatten(msg)
808 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000809Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
810 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
811 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
812 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
813 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
814 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
815 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
816 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
817 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
818 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
819 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000820
821""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000822 eq(h.encode(maxlinelen=76), """\
823=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
824 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
825 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
826 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
827 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
828 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
829 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
830 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
831 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
832 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
833 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000834
835 def test_long_header_encode(self):
836 eq = self.ndiffAssertEqual
837 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
838 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
839 header_name='X-Foobar-Spoink-Defrobnit')
840 eq(h.encode(), '''\
841wasnipoop; giraffes="very-long-necked-animals";
842 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
843
844 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
845 eq = self.ndiffAssertEqual
846 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
847 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
848 header_name='X-Foobar-Spoink-Defrobnit',
849 continuation_ws='\t')
850 eq(h.encode(), '''\
851wasnipoop; giraffes="very-long-necked-animals";
852 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
853
854 def test_long_header_encode_with_tab_continuation(self):
855 eq = self.ndiffAssertEqual
856 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
857 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
858 header_name='X-Foobar-Spoink-Defrobnit',
859 continuation_ws='\t')
860 eq(h.encode(), '''\
861wasnipoop; giraffes="very-long-necked-animals";
862\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
863
R David Murray3a6152f2011-03-14 21:13:03 -0400864 def test_header_encode_with_different_output_charset(self):
865 h = Header('文', 'euc-jp')
866 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
867
868 def test_long_header_encode_with_different_output_charset(self):
869 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
870 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
871 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
872 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
873 res = """\
874=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
875 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
876 self.assertEqual(h.encode(), res)
877
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000878 def test_header_splitter(self):
879 eq = self.ndiffAssertEqual
880 msg = MIMEText('')
881 # It'd be great if we could use add_header() here, but that doesn't
882 # guarantee an order of the parameters.
883 msg['X-Foobar-Spoink-Defrobnit'] = (
884 'wasnipoop; giraffes="very-long-necked-animals"; '
885 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
886 sfp = StringIO()
887 g = Generator(sfp)
888 g.flatten(msg)
889 eq(sfp.getvalue(), '''\
890Content-Type: text/plain; charset="us-ascii"
891MIME-Version: 1.0
892Content-Transfer-Encoding: 7bit
893X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
894 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
895
896''')
897
898 def test_no_semis_header_splitter(self):
899 eq = self.ndiffAssertEqual
900 msg = Message()
901 msg['From'] = 'test@dom.ain'
902 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
903 msg.set_payload('Test')
904 sfp = StringIO()
905 g = Generator(sfp)
906 g.flatten(msg)
907 eq(sfp.getvalue(), """\
908From: test@dom.ain
909References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
910 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
911
912Test""")
913
R David Murray7da4db12011-04-07 20:37:17 -0400914 def test_last_split_chunk_does_not_fit(self):
915 eq = self.ndiffAssertEqual
916 h = Header('Subject: the first part of this is short, but_the_second'
917 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
918 '_all_by_itself')
919 eq(h.encode(), """\
920Subject: the first part of this is short,
921 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
922
923 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
924 eq = self.ndiffAssertEqual
925 h = Header(', but_the_second'
926 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
927 '_all_by_itself')
928 eq(h.encode(), """\
929,
930 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
931
932 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
933 eq = self.ndiffAssertEqual
934 h = Header(', , but_the_second'
935 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
936 '_all_by_itself')
937 eq(h.encode(), """\
938, ,
939 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
940
941 def test_trailing_splitable_on_overlong_unsplitable(self):
942 eq = self.ndiffAssertEqual
943 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
944 'be_on_a_line_all_by_itself;')
945 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
946 "be_on_a_line_all_by_itself;")
947
948 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
949 eq = self.ndiffAssertEqual
950 h = Header('; '
951 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400952 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400953 eq(h.encode(), """\
954;
R David Murray01581ee2011-04-18 10:04:34 -0400955 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400956
R David Murraye1292a22011-04-07 20:54:03 -0400957 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400958 eq = self.ndiffAssertEqual
959 h = Header('This is a long line that has two whitespaces in a row. '
960 'This used to cause truncation of the header when folded')
961 eq(h.encode(), """\
962This is a long line that has two whitespaces in a row. This used to cause
963 truncation of the header when folded""")
964
Ezio Melotti1c4810b2013-08-10 18:57:12 +0300965 def test_splitter_split_on_punctuation_only_if_fws_with_header(self):
R David Murray01581ee2011-04-18 10:04:34 -0400966 eq = self.ndiffAssertEqual
967 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
968 'they;arenotlegal;fold,points')
969 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
970 "arenotlegal;fold,points")
971
972 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
973 eq = self.ndiffAssertEqual
974 h = Header('this is a test where we need to have more than one line '
975 'before; our final line that is just too big to fit;; '
976 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
977 'be_on_a_line_all_by_itself;')
978 eq(h.encode(), """\
979this is a test where we need to have more than one line before;
980 our final line that is just too big to fit;;
981 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
982
983 def test_overlong_last_part_followed_by_split_point(self):
984 eq = self.ndiffAssertEqual
985 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
986 'be_on_a_line_all_by_itself ')
987 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
988 "should_be_on_a_line_all_by_itself ")
989
990 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
991 eq = self.ndiffAssertEqual
992 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
993 'before_our_final_line_; ; '
994 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
995 'be_on_a_line_all_by_itself; ')
996 eq(h.encode(), """\
997this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
998 ;
999 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1000
1001 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
1002 eq = self.ndiffAssertEqual
1003 h = Header('this is a test where we need to have more than one line '
1004 'before our final line; ; '
1005 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1006 'be_on_a_line_all_by_itself; ')
1007 eq(h.encode(), """\
1008this is a test where we need to have more than one line before our final line;
1009 ;
1010 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1011
1012 def test_long_header_with_whitespace_runs(self):
1013 eq = self.ndiffAssertEqual
1014 msg = Message()
1015 msg['From'] = 'test@dom.ain'
1016 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
1017 msg.set_payload('Test')
1018 sfp = StringIO()
1019 g = Generator(sfp)
1020 g.flatten(msg)
1021 eq(sfp.getvalue(), """\
1022From: test@dom.ain
1023References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1024 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1025 <foo@dom.ain> <foo@dom.ain>\x20\x20
1026
1027Test""")
1028
1029 def test_long_run_with_semi_header_splitter(self):
1030 eq = self.ndiffAssertEqual
1031 msg = Message()
1032 msg['From'] = 'test@dom.ain'
1033 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
1034 msg.set_payload('Test')
1035 sfp = StringIO()
1036 g = Generator(sfp)
1037 g.flatten(msg)
1038 eq(sfp.getvalue(), """\
1039From: test@dom.ain
1040References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1041 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1042 <foo@dom.ain>; abc
1043
1044Test""")
1045
1046 def test_splitter_split_on_punctuation_only_if_fws(self):
1047 eq = self.ndiffAssertEqual
1048 msg = Message()
1049 msg['From'] = 'test@dom.ain'
1050 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
1051 'they;arenotlegal;fold,points')
1052 msg.set_payload('Test')
1053 sfp = StringIO()
1054 g = Generator(sfp)
1055 g.flatten(msg)
1056 # XXX the space after the header should not be there.
1057 eq(sfp.getvalue(), """\
1058From: test@dom.ain
1059References:\x20
1060 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
1061
1062Test""")
1063
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001064 def test_no_split_long_header(self):
1065 eq = self.ndiffAssertEqual
1066 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +00001067 h = Header(hstr)
1068 # These come on two lines because Headers are really field value
1069 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001070 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001071References:
1072 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1073 h = Header('x' * 80)
1074 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001075
1076 def test_splitting_multiple_long_lines(self):
1077 eq = self.ndiffAssertEqual
1078 hstr = """\
1079from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1080\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1081\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1082"""
1083 h = Header(hstr, continuation_ws='\t')
1084 eq(h.encode(), """\
1085from babylon.socal-raves.org (localhost [127.0.0.1]);
1086 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1087 for <mailman-admin@babylon.socal-raves.org>;
1088 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1089\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1090 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1091 for <mailman-admin@babylon.socal-raves.org>;
1092 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1093\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1094 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1095 for <mailman-admin@babylon.socal-raves.org>;
1096 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1097
1098 def test_splitting_first_line_only_is_long(self):
1099 eq = self.ndiffAssertEqual
1100 hstr = """\
1101from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1102\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1103\tid 17k4h5-00034i-00
1104\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1105 h = Header(hstr, maxlinelen=78, header_name='Received',
1106 continuation_ws='\t')
1107 eq(h.encode(), """\
1108from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1109 helo=cthulhu.gerg.ca)
1110\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1111\tid 17k4h5-00034i-00
1112\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1113
1114 def test_long_8bit_header(self):
1115 eq = self.ndiffAssertEqual
1116 msg = Message()
1117 h = Header('Britische Regierung gibt', 'iso-8859-1',
1118 header_name='Subject')
1119 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001120 eq(h.encode(maxlinelen=76), """\
1121=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1122 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001123 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001124 eq(msg.as_string(maxheaderlen=76), """\
1125Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1126 =?iso-8859-1?q?hore-Windkraftprojekte?=
1127
1128""")
1129 eq(msg.as_string(maxheaderlen=0), """\
1130Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001131
1132""")
1133
1134 def test_long_8bit_header_no_charset(self):
1135 eq = self.ndiffAssertEqual
1136 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001137 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1138 'f\xfcr Offshore-Windkraftprojekte '
1139 '<a-very-long-address@example.com>')
1140 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001141 eq(msg.as_string(maxheaderlen=78), """\
1142Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1143 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1144
1145""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001146 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001147 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001148 header_name='Reply-To')
1149 eq(msg.as_string(maxheaderlen=78), """\
1150Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1151 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001152
1153""")
1154
1155 def test_long_to_header(self):
1156 eq = self.ndiffAssertEqual
1157 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001158 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001159 '"Someone Test #B" <someone@umich.edu>, '
1160 '"Someone Test #C" <someone@eecs.umich.edu>, '
1161 '"Someone Test #D" <someone@eecs.umich.edu>')
1162 msg = Message()
1163 msg['To'] = to
1164 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001165To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001166 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001167 "Someone Test #C" <someone@eecs.umich.edu>,
1168 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001169
1170''')
1171
1172 def test_long_line_after_append(self):
1173 eq = self.ndiffAssertEqual
1174 s = 'This is an example of string which has almost the limit of header length.'
1175 h = Header(s)
1176 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001177 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001178This is an example of string which has almost the limit of header length.
1179 Add another line.""")
1180
1181 def test_shorter_line_with_append(self):
1182 eq = self.ndiffAssertEqual
1183 s = 'This is a shorter line.'
1184 h = Header(s)
1185 h.append('Add another sentence. (Surprise?)')
1186 eq(h.encode(),
1187 'This is a shorter line. Add another sentence. (Surprise?)')
1188
1189 def test_long_field_name(self):
1190 eq = self.ndiffAssertEqual
1191 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001192 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1193 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1194 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1195 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001196 h = Header(gs, 'iso-8859-1', header_name=fn)
1197 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001198 eq(h.encode(maxlinelen=76), """\
1199=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1200 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1201 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1202 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001203
1204 def test_long_received_header(self):
1205 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1206 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1207 'Wed, 05 Mar 2003 18:10:18 -0700')
1208 msg = Message()
1209 msg['Received-1'] = Header(h, continuation_ws='\t')
1210 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001211 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001212 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001213Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1214 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001215 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001216Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1217 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001218 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001219
1220""")
1221
1222 def test_string_headerinst_eq(self):
1223 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1224 'tu-muenchen.de> (David Bremner\'s message of '
1225 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1226 msg = Message()
1227 msg['Received-1'] = Header(h, header_name='Received-1',
1228 continuation_ws='\t')
1229 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001230 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001231 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001232Received-1:\x20
1233 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1234 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1235Received-2:\x20
1236 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1237 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001238
1239""")
1240
1241 def test_long_unbreakable_lines_with_continuation(self):
1242 eq = self.ndiffAssertEqual
1243 msg = Message()
1244 t = """\
1245iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1246 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1247 msg['Face-1'] = t
1248 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001249 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001250 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001251 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001252 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001253Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001254 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001255 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001256Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001257 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001258 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001259Face-3:\x20
1260 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1261 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001262
1263""")
1264
1265 def test_another_long_multiline_header(self):
1266 eq = self.ndiffAssertEqual
1267 m = ('Received: from siimage.com '
1268 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001269 'Microsoft SMTPSVC(5.0.2195.4905); '
1270 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001271 msg = email.message_from_string(m)
1272 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001273Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1274 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001275
1276''')
1277
1278 def test_long_lines_with_different_header(self):
1279 eq = self.ndiffAssertEqual
1280 h = ('List-Unsubscribe: '
1281 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1282 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1283 '?subject=unsubscribe>')
1284 msg = Message()
1285 msg['List'] = h
1286 msg['List'] = Header(h, header_name='List')
1287 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001288List: List-Unsubscribe:
1289 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001290 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001291List: List-Unsubscribe:
1292 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001293 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001294
1295""")
1296
R. David Murray6f0022d2011-01-07 21:57:25 +00001297 def test_long_rfc2047_header_with_embedded_fws(self):
1298 h = Header(textwrap.dedent("""\
1299 We're going to pretend this header is in a non-ascii character set
1300 \tto see if line wrapping with encoded words and embedded
1301 folding white space works"""),
1302 charset='utf-8',
1303 header_name='Test')
1304 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1305 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1306 =?utf-8?q?cter_set?=
1307 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1308 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1309
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001310
Ezio Melottib3aedd42010-11-20 19:04:17 +00001311
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001312# Test mangling of "From " lines in the body of a message
1313class TestFromMangling(unittest.TestCase):
1314 def setUp(self):
1315 self.msg = Message()
1316 self.msg['From'] = 'aaa@bbb.org'
1317 self.msg.set_payload("""\
1318From the desk of A.A.A.:
1319Blah blah blah
1320""")
1321
1322 def test_mangled_from(self):
1323 s = StringIO()
1324 g = Generator(s, mangle_from_=True)
1325 g.flatten(self.msg)
1326 self.assertEqual(s.getvalue(), """\
1327From: aaa@bbb.org
1328
1329>From the desk of A.A.A.:
1330Blah blah blah
1331""")
1332
1333 def test_dont_mangle_from(self):
1334 s = StringIO()
1335 g = Generator(s, mangle_from_=False)
1336 g.flatten(self.msg)
1337 self.assertEqual(s.getvalue(), """\
1338From: aaa@bbb.org
1339
1340From the desk of A.A.A.:
1341Blah blah blah
1342""")
1343
R David Murray6a31bc62012-07-22 21:47:53 -04001344 def test_mangle_from_in_preamble_and_epilog(self):
1345 s = StringIO()
1346 g = Generator(s, mangle_from_=True)
1347 msg = email.message_from_string(textwrap.dedent("""\
1348 From: foo@bar.com
1349 Mime-Version: 1.0
1350 Content-Type: multipart/mixed; boundary=XXX
1351
1352 From somewhere unknown
1353
1354 --XXX
1355 Content-Type: text/plain
1356
1357 foo
1358
1359 --XXX--
1360
1361 From somewhere unknowable
1362 """))
1363 g.flatten(msg)
1364 self.assertEqual(len([1 for x in s.getvalue().split('\n')
1365 if x.startswith('>From ')]), 2)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001366
R David Murray638d40b2012-08-24 11:14:13 -04001367 def test_mangled_from_with_bad_bytes(self):
1368 source = textwrap.dedent("""\
1369 Content-Type: text/plain; charset="utf-8"
1370 MIME-Version: 1.0
1371 Content-Transfer-Encoding: 8bit
1372 From: aaa@bbb.org
1373
1374 """).encode('utf-8')
1375 msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')
1376 b = BytesIO()
1377 g = BytesGenerator(b, mangle_from_=True)
1378 g.flatten(msg)
1379 self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')
1380
Ezio Melottib3aedd42010-11-20 19:04:17 +00001381
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001382# Test the basic MIMEAudio class
1383class TestMIMEAudio(unittest.TestCase):
1384 def setUp(self):
R David Murray28346b82011-03-31 11:40:20 -04001385 with openfile('audiotest.au', 'rb') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001386 self._audiodata = fp.read()
1387 self._au = MIMEAudio(self._audiodata)
1388
1389 def test_guess_minor_type(self):
1390 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1391
1392 def test_encoding(self):
1393 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001394 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1395 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001396
1397 def test_checkSetMinor(self):
1398 au = MIMEAudio(self._audiodata, 'fish')
1399 self.assertEqual(au.get_content_type(), 'audio/fish')
1400
1401 def test_add_header(self):
1402 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001403 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001404 self._au.add_header('Content-Disposition', 'attachment',
1405 filename='audiotest.au')
1406 eq(self._au['content-disposition'],
1407 'attachment; filename="audiotest.au"')
1408 eq(self._au.get_params(header='content-disposition'),
1409 [('attachment', ''), ('filename', 'audiotest.au')])
1410 eq(self._au.get_param('filename', header='content-disposition'),
1411 'audiotest.au')
1412 missing = []
1413 eq(self._au.get_param('attachment', header='content-disposition'), '')
1414 unless(self._au.get_param('foo', failobj=missing,
1415 header='content-disposition') is missing)
1416 # Try some missing stuff
1417 unless(self._au.get_param('foobar', missing) is missing)
1418 unless(self._au.get_param('attachment', missing,
1419 header='foobar') is missing)
1420
1421
Ezio Melottib3aedd42010-11-20 19:04:17 +00001422
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001423# Test the basic MIMEImage class
1424class TestMIMEImage(unittest.TestCase):
1425 def setUp(self):
1426 with openfile('PyBanner048.gif', 'rb') as fp:
1427 self._imgdata = fp.read()
1428 self._im = MIMEImage(self._imgdata)
1429
1430 def test_guess_minor_type(self):
1431 self.assertEqual(self._im.get_content_type(), 'image/gif')
1432
1433 def test_encoding(self):
1434 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001435 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1436 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001437
1438 def test_checkSetMinor(self):
1439 im = MIMEImage(self._imgdata, 'fish')
1440 self.assertEqual(im.get_content_type(), 'image/fish')
1441
1442 def test_add_header(self):
1443 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001444 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001445 self._im.add_header('Content-Disposition', 'attachment',
1446 filename='dingusfish.gif')
1447 eq(self._im['content-disposition'],
1448 'attachment; filename="dingusfish.gif"')
1449 eq(self._im.get_params(header='content-disposition'),
1450 [('attachment', ''), ('filename', 'dingusfish.gif')])
1451 eq(self._im.get_param('filename', header='content-disposition'),
1452 'dingusfish.gif')
1453 missing = []
1454 eq(self._im.get_param('attachment', header='content-disposition'), '')
1455 unless(self._im.get_param('foo', failobj=missing,
1456 header='content-disposition') is missing)
1457 # Try some missing stuff
1458 unless(self._im.get_param('foobar', missing) is missing)
1459 unless(self._im.get_param('attachment', missing,
1460 header='foobar') is missing)
1461
1462
Ezio Melottib3aedd42010-11-20 19:04:17 +00001463
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001464# Test the basic MIMEApplication class
1465class TestMIMEApplication(unittest.TestCase):
1466 def test_headers(self):
1467 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001468 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001469 eq(msg.get_content_type(), 'application/octet-stream')
1470 eq(msg['content-transfer-encoding'], 'base64')
1471
1472 def test_body(self):
1473 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001474 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1475 msg = MIMEApplication(bytesdata)
1476 # whitespace in the cte encoded block is RFC-irrelevant.
1477 eq(msg.get_payload().strip(), '+vv8/f7/')
1478 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001479
R David Murrayec317a82013-02-11 10:51:28 -05001480 def test_binary_body_with_encode_7or8bit(self):
1481 # Issue 17171.
1482 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1483 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit)
1484 # Treated as a string, this will be invalid code points.
1485 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1486 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1487 self.assertEqual(msg['Content-Transfer-Encoding'], '8bit')
1488 s = BytesIO()
1489 g = BytesGenerator(s)
1490 g.flatten(msg)
1491 wireform = s.getvalue()
1492 msg2 = email.message_from_bytes(wireform)
1493 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1494 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1495 self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit')
1496
1497 def test_binary_body_with_encode_noop(self):
R David Murrayceaa8b12013-02-09 13:02:58 -05001498 # Issue 16564: This does not produce an RFC valid message, since to be
1499 # valid it should have a CTE of binary. But the below works in
1500 # Python2, and is documented as working this way.
1501 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1502 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1503 # Treated as a string, this will be invalid code points.
1504 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1505 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1506 s = BytesIO()
1507 g = BytesGenerator(s)
1508 g.flatten(msg)
1509 wireform = s.getvalue()
1510 msg2 = email.message_from_bytes(wireform)
1511 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1512 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001513
R David Murrayf6069f92013-06-27 18:37:00 -04001514 def test_binary_body_with_encode_quopri(self):
1515 # Issue 14360.
1516 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff '
1517 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_quopri)
1518 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1519 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1520 self.assertEqual(msg['Content-Transfer-Encoding'], 'quoted-printable')
1521 s = BytesIO()
1522 g = BytesGenerator(s)
1523 g.flatten(msg)
1524 wireform = s.getvalue()
1525 msg2 = email.message_from_bytes(wireform)
1526 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1527 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1528 self.assertEqual(msg2['Content-Transfer-Encoding'], 'quoted-printable')
1529
1530 def test_binary_body_with_encode_base64(self):
1531 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1532 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_base64)
1533 self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1534 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1535 s = BytesIO()
1536 g = BytesGenerator(s)
1537 g.flatten(msg)
1538 wireform = s.getvalue()
1539 msg2 = email.message_from_bytes(wireform)
1540 self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1541 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1542
Ezio Melottib3aedd42010-11-20 19:04:17 +00001543
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001544# Test the basic MIMEText class
1545class TestMIMEText(unittest.TestCase):
1546 def setUp(self):
1547 self._msg = MIMEText('hello there')
1548
1549 def test_types(self):
1550 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001551 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001552 eq(self._msg.get_content_type(), 'text/plain')
1553 eq(self._msg.get_param('charset'), 'us-ascii')
1554 missing = []
1555 unless(self._msg.get_param('foobar', missing) is missing)
1556 unless(self._msg.get_param('charset', missing, header='foobar')
1557 is missing)
1558
1559 def test_payload(self):
1560 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001561 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001562
1563 def test_charset(self):
1564 eq = self.assertEqual
1565 msg = MIMEText('hello there', _charset='us-ascii')
1566 eq(msg.get_charset().input_charset, 'us-ascii')
1567 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1568
R. David Murray850fc852010-06-03 01:58:28 +00001569 def test_7bit_input(self):
1570 eq = self.assertEqual
1571 msg = MIMEText('hello there', _charset='us-ascii')
1572 eq(msg.get_charset().input_charset, 'us-ascii')
1573 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1574
1575 def test_7bit_input_no_charset(self):
1576 eq = self.assertEqual
1577 msg = MIMEText('hello there')
1578 eq(msg.get_charset(), 'us-ascii')
1579 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1580 self.assertTrue('hello there' in msg.as_string())
1581
1582 def test_utf8_input(self):
1583 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1584 eq = self.assertEqual
1585 msg = MIMEText(teststr, _charset='utf-8')
1586 eq(msg.get_charset().output_charset, 'utf-8')
1587 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1588 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1589
1590 @unittest.skip("can't fix because of backward compat in email5, "
1591 "will fix in email6")
1592 def test_utf8_input_no_charset(self):
1593 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1594 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1595
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001596
Ezio Melottib3aedd42010-11-20 19:04:17 +00001597
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001598# Test complicated multipart/* messages
1599class TestMultipart(TestEmailBase):
1600 def setUp(self):
1601 with openfile('PyBanner048.gif', 'rb') as fp:
1602 data = fp.read()
1603 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1604 image = MIMEImage(data, name='dingusfish.gif')
1605 image.add_header('content-disposition', 'attachment',
1606 filename='dingusfish.gif')
1607 intro = MIMEText('''\
1608Hi there,
1609
1610This is the dingus fish.
1611''')
1612 container.attach(intro)
1613 container.attach(image)
1614 container['From'] = 'Barry <barry@digicool.com>'
1615 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1616 container['Subject'] = 'Here is your dingus fish'
1617
1618 now = 987809702.54848599
1619 timetuple = time.localtime(now)
1620 if timetuple[-1] == 0:
1621 tzsecs = time.timezone
1622 else:
1623 tzsecs = time.altzone
1624 if tzsecs > 0:
1625 sign = '-'
1626 else:
1627 sign = '+'
1628 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1629 container['Date'] = time.strftime(
1630 '%a, %d %b %Y %H:%M:%S',
1631 time.localtime(now)) + tzoffset
1632 self._msg = container
1633 self._im = image
1634 self._txt = intro
1635
1636 def test_hierarchy(self):
1637 # convenience
1638 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001639 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001640 raises = self.assertRaises
1641 # tests
1642 m = self._msg
1643 unless(m.is_multipart())
1644 eq(m.get_content_type(), 'multipart/mixed')
1645 eq(len(m.get_payload()), 2)
1646 raises(IndexError, m.get_payload, 2)
1647 m0 = m.get_payload(0)
1648 m1 = m.get_payload(1)
1649 unless(m0 is self._txt)
1650 unless(m1 is self._im)
1651 eq(m.get_payload(), [m0, m1])
1652 unless(not m0.is_multipart())
1653 unless(not m1.is_multipart())
1654
1655 def test_empty_multipart_idempotent(self):
1656 text = """\
1657Content-Type: multipart/mixed; boundary="BOUNDARY"
1658MIME-Version: 1.0
1659Subject: A subject
1660To: aperson@dom.ain
1661From: bperson@dom.ain
1662
1663
1664--BOUNDARY
1665
1666
1667--BOUNDARY--
1668"""
1669 msg = Parser().parsestr(text)
1670 self.ndiffAssertEqual(text, msg.as_string())
1671
1672 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1673 outer = MIMEBase('multipart', 'mixed')
1674 outer['Subject'] = 'A subject'
1675 outer['To'] = 'aperson@dom.ain'
1676 outer['From'] = 'bperson@dom.ain'
1677 outer.set_boundary('BOUNDARY')
1678 self.ndiffAssertEqual(outer.as_string(), '''\
1679Content-Type: multipart/mixed; boundary="BOUNDARY"
1680MIME-Version: 1.0
1681Subject: A subject
1682To: aperson@dom.ain
1683From: bperson@dom.ain
1684
1685--BOUNDARY
1686
1687--BOUNDARY--''')
1688
1689 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1690 outer = MIMEBase('multipart', 'mixed')
1691 outer['Subject'] = 'A subject'
1692 outer['To'] = 'aperson@dom.ain'
1693 outer['From'] = 'bperson@dom.ain'
1694 outer.preamble = ''
1695 outer.epilogue = ''
1696 outer.set_boundary('BOUNDARY')
1697 self.ndiffAssertEqual(outer.as_string(), '''\
1698Content-Type: multipart/mixed; boundary="BOUNDARY"
1699MIME-Version: 1.0
1700Subject: A subject
1701To: aperson@dom.ain
1702From: bperson@dom.ain
1703
1704
1705--BOUNDARY
1706
1707--BOUNDARY--
1708''')
1709
1710 def test_one_part_in_a_multipart(self):
1711 eq = self.ndiffAssertEqual
1712 outer = MIMEBase('multipart', 'mixed')
1713 outer['Subject'] = 'A subject'
1714 outer['To'] = 'aperson@dom.ain'
1715 outer['From'] = 'bperson@dom.ain'
1716 outer.set_boundary('BOUNDARY')
1717 msg = MIMEText('hello world')
1718 outer.attach(msg)
1719 eq(outer.as_string(), '''\
1720Content-Type: multipart/mixed; boundary="BOUNDARY"
1721MIME-Version: 1.0
1722Subject: A subject
1723To: aperson@dom.ain
1724From: bperson@dom.ain
1725
1726--BOUNDARY
1727Content-Type: text/plain; charset="us-ascii"
1728MIME-Version: 1.0
1729Content-Transfer-Encoding: 7bit
1730
1731hello world
1732--BOUNDARY--''')
1733
1734 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1735 eq = self.ndiffAssertEqual
1736 outer = MIMEBase('multipart', 'mixed')
1737 outer['Subject'] = 'A subject'
1738 outer['To'] = 'aperson@dom.ain'
1739 outer['From'] = 'bperson@dom.ain'
1740 outer.preamble = ''
1741 msg = MIMEText('hello world')
1742 outer.attach(msg)
1743 outer.set_boundary('BOUNDARY')
1744 eq(outer.as_string(), '''\
1745Content-Type: multipart/mixed; boundary="BOUNDARY"
1746MIME-Version: 1.0
1747Subject: A subject
1748To: aperson@dom.ain
1749From: bperson@dom.ain
1750
1751
1752--BOUNDARY
1753Content-Type: text/plain; charset="us-ascii"
1754MIME-Version: 1.0
1755Content-Transfer-Encoding: 7bit
1756
1757hello world
1758--BOUNDARY--''')
1759
1760
1761 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1762 eq = self.ndiffAssertEqual
1763 outer = MIMEBase('multipart', 'mixed')
1764 outer['Subject'] = 'A subject'
1765 outer['To'] = 'aperson@dom.ain'
1766 outer['From'] = 'bperson@dom.ain'
1767 outer.preamble = None
1768 msg = MIMEText('hello world')
1769 outer.attach(msg)
1770 outer.set_boundary('BOUNDARY')
1771 eq(outer.as_string(), '''\
1772Content-Type: multipart/mixed; boundary="BOUNDARY"
1773MIME-Version: 1.0
1774Subject: A subject
1775To: aperson@dom.ain
1776From: bperson@dom.ain
1777
1778--BOUNDARY
1779Content-Type: text/plain; charset="us-ascii"
1780MIME-Version: 1.0
1781Content-Transfer-Encoding: 7bit
1782
1783hello world
1784--BOUNDARY--''')
1785
1786
1787 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1788 eq = self.ndiffAssertEqual
1789 outer = MIMEBase('multipart', 'mixed')
1790 outer['Subject'] = 'A subject'
1791 outer['To'] = 'aperson@dom.ain'
1792 outer['From'] = 'bperson@dom.ain'
1793 outer.epilogue = None
1794 msg = MIMEText('hello world')
1795 outer.attach(msg)
1796 outer.set_boundary('BOUNDARY')
1797 eq(outer.as_string(), '''\
1798Content-Type: multipart/mixed; boundary="BOUNDARY"
1799MIME-Version: 1.0
1800Subject: A subject
1801To: aperson@dom.ain
1802From: bperson@dom.ain
1803
1804--BOUNDARY
1805Content-Type: text/plain; charset="us-ascii"
1806MIME-Version: 1.0
1807Content-Transfer-Encoding: 7bit
1808
1809hello world
1810--BOUNDARY--''')
1811
1812
1813 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1814 eq = self.ndiffAssertEqual
1815 outer = MIMEBase('multipart', 'mixed')
1816 outer['Subject'] = 'A subject'
1817 outer['To'] = 'aperson@dom.ain'
1818 outer['From'] = 'bperson@dom.ain'
1819 outer.epilogue = ''
1820 msg = MIMEText('hello world')
1821 outer.attach(msg)
1822 outer.set_boundary('BOUNDARY')
1823 eq(outer.as_string(), '''\
1824Content-Type: multipart/mixed; boundary="BOUNDARY"
1825MIME-Version: 1.0
1826Subject: A subject
1827To: aperson@dom.ain
1828From: bperson@dom.ain
1829
1830--BOUNDARY
1831Content-Type: text/plain; charset="us-ascii"
1832MIME-Version: 1.0
1833Content-Transfer-Encoding: 7bit
1834
1835hello world
1836--BOUNDARY--
1837''')
1838
1839
1840 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1841 eq = self.ndiffAssertEqual
1842 outer = MIMEBase('multipart', 'mixed')
1843 outer['Subject'] = 'A subject'
1844 outer['To'] = 'aperson@dom.ain'
1845 outer['From'] = 'bperson@dom.ain'
1846 outer.epilogue = '\n'
1847 msg = MIMEText('hello world')
1848 outer.attach(msg)
1849 outer.set_boundary('BOUNDARY')
1850 eq(outer.as_string(), '''\
1851Content-Type: multipart/mixed; boundary="BOUNDARY"
1852MIME-Version: 1.0
1853Subject: A subject
1854To: aperson@dom.ain
1855From: bperson@dom.ain
1856
1857--BOUNDARY
1858Content-Type: text/plain; charset="us-ascii"
1859MIME-Version: 1.0
1860Content-Transfer-Encoding: 7bit
1861
1862hello world
1863--BOUNDARY--
1864
1865''')
1866
1867 def test_message_external_body(self):
1868 eq = self.assertEqual
1869 msg = self._msgobj('msg_36.txt')
1870 eq(len(msg.get_payload()), 2)
1871 msg1 = msg.get_payload(1)
1872 eq(msg1.get_content_type(), 'multipart/alternative')
1873 eq(len(msg1.get_payload()), 2)
1874 for subpart in msg1.get_payload():
1875 eq(subpart.get_content_type(), 'message/external-body')
1876 eq(len(subpart.get_payload()), 1)
1877 subsubpart = subpart.get_payload(0)
1878 eq(subsubpart.get_content_type(), 'text/plain')
1879
1880 def test_double_boundary(self):
1881 # msg_37.txt is a multipart that contains two dash-boundary's in a
1882 # row. Our interpretation of RFC 2046 calls for ignoring the second
1883 # and subsequent boundaries.
1884 msg = self._msgobj('msg_37.txt')
1885 self.assertEqual(len(msg.get_payload()), 3)
1886
1887 def test_nested_inner_contains_outer_boundary(self):
1888 eq = self.ndiffAssertEqual
1889 # msg_38.txt has an inner part that contains outer boundaries. My
1890 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1891 # these are illegal and should be interpreted as unterminated inner
1892 # parts.
1893 msg = self._msgobj('msg_38.txt')
1894 sfp = StringIO()
1895 iterators._structure(msg, sfp)
1896 eq(sfp.getvalue(), """\
1897multipart/mixed
1898 multipart/mixed
1899 multipart/alternative
1900 text/plain
1901 text/plain
1902 text/plain
1903 text/plain
1904""")
1905
1906 def test_nested_with_same_boundary(self):
1907 eq = self.ndiffAssertEqual
1908 # msg 39.txt is similarly evil in that it's got inner parts that use
1909 # the same boundary as outer parts. Again, I believe the way this is
1910 # parsed is closest to the spirit of RFC 2046
1911 msg = self._msgobj('msg_39.txt')
1912 sfp = StringIO()
1913 iterators._structure(msg, sfp)
1914 eq(sfp.getvalue(), """\
1915multipart/mixed
1916 multipart/mixed
1917 multipart/alternative
1918 application/octet-stream
1919 application/octet-stream
1920 text/plain
1921""")
1922
1923 def test_boundary_in_non_multipart(self):
1924 msg = self._msgobj('msg_40.txt')
1925 self.assertEqual(msg.as_string(), '''\
1926MIME-Version: 1.0
1927Content-Type: text/html; boundary="--961284236552522269"
1928
1929----961284236552522269
1930Content-Type: text/html;
1931Content-Transfer-Encoding: 7Bit
1932
1933<html></html>
1934
1935----961284236552522269--
1936''')
1937
1938 def test_boundary_with_leading_space(self):
1939 eq = self.assertEqual
1940 msg = email.message_from_string('''\
1941MIME-Version: 1.0
1942Content-Type: multipart/mixed; boundary=" XXXX"
1943
1944-- XXXX
1945Content-Type: text/plain
1946
1947
1948-- XXXX
1949Content-Type: text/plain
1950
1951-- XXXX--
1952''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001953 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001954 eq(msg.get_boundary(), ' XXXX')
1955 eq(len(msg.get_payload()), 2)
1956
1957 def test_boundary_without_trailing_newline(self):
1958 m = Parser().parsestr("""\
1959Content-Type: multipart/mixed; boundary="===============0012394164=="
1960MIME-Version: 1.0
1961
1962--===============0012394164==
1963Content-Type: image/file1.jpg
1964MIME-Version: 1.0
1965Content-Transfer-Encoding: base64
1966
1967YXNkZg==
1968--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001969 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001970
1971
Ezio Melottib3aedd42010-11-20 19:04:17 +00001972
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001973# Test some badly formatted messages
R David Murrayc27e5222012-05-25 15:01:48 -04001974class TestNonConformant(TestEmailBase):
R David Murray3edd22a2011-04-18 13:59:37 -04001975
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001976 def test_parse_missing_minor_type(self):
1977 eq = self.assertEqual
1978 msg = self._msgobj('msg_14.txt')
1979 eq(msg.get_content_type(), 'text/plain')
1980 eq(msg.get_content_maintype(), 'text')
1981 eq(msg.get_content_subtype(), 'plain')
1982
R David Murray80e0aee2012-05-27 21:23:34 -04001983 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001984 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001985 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001986 msg = self._msgobj('msg_15.txt')
1987 # XXX We can probably eventually do better
1988 inner = msg.get_payload(0)
1989 unless(hasattr(inner, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04001990 self.assertEqual(len(inner.defects), 1)
1991 unless(isinstance(inner.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001992 errors.StartBoundaryNotFoundDefect))
1993
R David Murray80e0aee2012-05-27 21:23:34 -04001994 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001995 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001996 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001997 msg = self._msgobj('msg_25.txt')
1998 unless(isinstance(msg.get_payload(), str))
R David Murrayc27e5222012-05-25 15:01:48 -04001999 self.assertEqual(len(msg.defects), 2)
2000 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04002001 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04002002 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002003 errors.MultipartInvariantViolationDefect))
2004
R David Murray749073a2011-06-22 13:47:53 -04002005 multipart_msg = textwrap.dedent("""\
2006 Date: Wed, 14 Nov 2007 12:56:23 GMT
2007 From: foo@bar.invalid
2008 To: foo@bar.invalid
2009 Subject: Content-Transfer-Encoding: base64 and multipart
2010 MIME-Version: 1.0
2011 Content-Type: multipart/mixed;
2012 boundary="===============3344438784458119861=="{}
2013
2014 --===============3344438784458119861==
2015 Content-Type: text/plain
2016
2017 Test message
2018
2019 --===============3344438784458119861==
2020 Content-Type: application/octet-stream
2021 Content-Transfer-Encoding: base64
2022
2023 YWJj
2024
2025 --===============3344438784458119861==--
2026 """)
2027
R David Murray80e0aee2012-05-27 21:23:34 -04002028 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04002029 def test_multipart_invalid_cte(self):
R David Murrayc27e5222012-05-25 15:01:48 -04002030 msg = self._str_msg(
2031 self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
2032 self.assertEqual(len(msg.defects), 1)
2033 self.assertIsInstance(msg.defects[0],
R David Murray749073a2011-06-22 13:47:53 -04002034 errors.InvalidMultipartContentTransferEncodingDefect)
2035
R David Murray80e0aee2012-05-27 21:23:34 -04002036 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04002037 def test_multipart_no_cte_no_defect(self):
R David Murrayc27e5222012-05-25 15:01:48 -04002038 msg = self._str_msg(self.multipart_msg.format(''))
2039 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04002040
R David Murray80e0aee2012-05-27 21:23:34 -04002041 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04002042 def test_multipart_valid_cte_no_defect(self):
2043 for cte in ('7bit', '8bit', 'BINary'):
R David Murrayc27e5222012-05-25 15:01:48 -04002044 msg = self._str_msg(
R David Murray749073a2011-06-22 13:47:53 -04002045 self.multipart_msg.format(
R David Murrayc27e5222012-05-25 15:01:48 -04002046 "\nContent-Transfer-Encoding: {}".format(cte)))
2047 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04002048
R David Murray97f43c02012-06-24 05:03:27 -04002049 # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002050 def test_invalid_content_type(self):
2051 eq = self.assertEqual
2052 neq = self.ndiffAssertEqual
2053 msg = Message()
2054 # RFC 2045, $5.2 says invalid yields text/plain
2055 msg['Content-Type'] = 'text'
2056 eq(msg.get_content_maintype(), 'text')
2057 eq(msg.get_content_subtype(), 'plain')
2058 eq(msg.get_content_type(), 'text/plain')
2059 # Clear the old value and try something /really/ invalid
2060 del msg['content-type']
2061 msg['Content-Type'] = 'foo'
2062 eq(msg.get_content_maintype(), 'text')
2063 eq(msg.get_content_subtype(), 'plain')
2064 eq(msg.get_content_type(), 'text/plain')
2065 # Still, make sure that the message is idempotently generated
2066 s = StringIO()
2067 g = Generator(s)
2068 g.flatten(msg)
2069 neq(s.getvalue(), 'Content-Type: foo\n\n')
2070
2071 def test_no_start_boundary(self):
2072 eq = self.ndiffAssertEqual
2073 msg = self._msgobj('msg_31.txt')
2074 eq(msg.get_payload(), """\
2075--BOUNDARY
2076Content-Type: text/plain
2077
2078message 1
2079
2080--BOUNDARY
2081Content-Type: text/plain
2082
2083message 2
2084
2085--BOUNDARY--
2086""")
2087
2088 def test_no_separating_blank_line(self):
2089 eq = self.ndiffAssertEqual
2090 msg = self._msgobj('msg_35.txt')
2091 eq(msg.as_string(), """\
2092From: aperson@dom.ain
2093To: bperson@dom.ain
2094Subject: here's something interesting
2095
2096counter to RFC 2822, there's no separating newline here
2097""")
2098
R David Murray80e0aee2012-05-27 21:23:34 -04002099 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002100 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002101 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002102 msg = self._msgobj('msg_41.txt')
2103 unless(hasattr(msg, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04002104 self.assertEqual(len(msg.defects), 2)
2105 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04002106 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04002107 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002108 errors.MultipartInvariantViolationDefect))
2109
R David Murray80e0aee2012-05-27 21:23:34 -04002110 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002111 def test_missing_start_boundary(self):
2112 outer = self._msgobj('msg_42.txt')
2113 # The message structure is:
2114 #
2115 # multipart/mixed
2116 # text/plain
2117 # message/rfc822
2118 # multipart/mixed [*]
2119 #
2120 # [*] This message is missing its start boundary
2121 bad = outer.get_payload(1).get_payload(0)
R David Murrayc27e5222012-05-25 15:01:48 -04002122 self.assertEqual(len(bad.defects), 1)
2123 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002124 errors.StartBoundaryNotFoundDefect))
2125
R David Murray80e0aee2012-05-27 21:23:34 -04002126 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002127 def test_first_line_is_continuation_header(self):
2128 eq = self.assertEqual
R David Murrayadbdcdb2012-05-27 20:45:01 -04002129 m = ' Line 1\nSubject: test\n\nbody'
R David Murrayc27e5222012-05-25 15:01:48 -04002130 msg = email.message_from_string(m)
R David Murrayadbdcdb2012-05-27 20:45:01 -04002131 eq(msg.keys(), ['Subject'])
2132 eq(msg.get_payload(), 'body')
R David Murrayc27e5222012-05-25 15:01:48 -04002133 eq(len(msg.defects), 1)
R David Murrayadbdcdb2012-05-27 20:45:01 -04002134 self.assertDefectsEqual(msg.defects,
2135 [errors.FirstHeaderLineIsContinuationDefect])
R David Murrayc27e5222012-05-25 15:01:48 -04002136 eq(msg.defects[0].line, ' Line 1\n')
R David Murray3edd22a2011-04-18 13:59:37 -04002137
R David Murrayd41595b2012-05-28 20:14:10 -04002138 # test_defect_handling
R David Murrayadbdcdb2012-05-27 20:45:01 -04002139 def test_missing_header_body_separator(self):
2140 # Our heuristic if we see a line that doesn't look like a header (no
2141 # leading whitespace but no ':') is to assume that the blank line that
2142 # separates the header from the body is missing, and to stop parsing
2143 # headers and start parsing the body.
2144 msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
2145 self.assertEqual(msg.keys(), ['Subject'])
2146 self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
2147 self.assertDefectsEqual(msg.defects,
2148 [errors.MissingHeaderBodySeparatorDefect])
2149
Ezio Melottib3aedd42010-11-20 19:04:17 +00002150
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002151# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00002152class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002153 def test_rfc2047_multiline(self):
2154 eq = self.assertEqual
2155 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2156 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2157 dh = decode_header(s)
2158 eq(dh, [
R David Murray07ea53c2012-06-02 17:56:49 -04002159 (b'Re: ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002160 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
R David Murray07ea53c2012-06-02 17:56:49 -04002161 (b' baz foo bar ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002162 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2163 header = make_header(dh)
2164 eq(str(header),
2165 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00002166 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002167Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2168 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002169
R David Murray07ea53c2012-06-02 17:56:49 -04002170 def test_whitespace_keeper_unicode(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002171 eq = self.assertEqual
2172 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2173 dh = decode_header(s)
2174 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
R David Murray07ea53c2012-06-02 17:56:49 -04002175 (b' Pirard <pirard@dom.ain>', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002176 header = str(make_header(dh))
2177 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2178
R David Murray07ea53c2012-06-02 17:56:49 -04002179 def test_whitespace_keeper_unicode_2(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002180 eq = self.assertEqual
2181 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2182 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002183 eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'),
2184 (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002185 hu = str(make_header(dh))
2186 eq(hu, 'The quick brown fox jumped over the lazy dog')
2187
2188 def test_rfc2047_missing_whitespace(self):
2189 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2190 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002191 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2192 (b'rg', None), (b'\xe5', 'iso-8859-1'),
2193 (b'sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002194
2195 def test_rfc2047_with_whitespace(self):
2196 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2197 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002198 self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'),
2199 (b' rg ', None), (b'\xe5', 'iso-8859-1'),
2200 (b' sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002201
R. David Murrayc4e69cc2010-08-03 22:14:10 +00002202 def test_rfc2047_B_bad_padding(self):
2203 s = '=?iso-8859-1?B?%s?='
2204 data = [ # only test complete bytes
2205 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2206 ('dmk=', b'vi'), ('dmk', b'vi')
2207 ]
2208 for q, a in data:
2209 dh = decode_header(s % q)
2210 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002211
R. David Murray31e984c2010-10-01 15:40:20 +00002212 def test_rfc2047_Q_invalid_digits(self):
2213 # issue 10004.
2214 s = '=?iso-8659-1?Q?andr=e9=zz?='
2215 self.assertEqual(decode_header(s),
2216 [(b'andr\xe9=zz', 'iso-8659-1')])
2217
R David Murray07ea53c2012-06-02 17:56:49 -04002218 def test_rfc2047_rfc2047_1(self):
2219 # 1st testcase at end of rfc2047
2220 s = '(=?ISO-8859-1?Q?a?=)'
2221 self.assertEqual(decode_header(s),
2222 [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)])
2223
2224 def test_rfc2047_rfc2047_2(self):
2225 # 2nd testcase at end of rfc2047
2226 s = '(=?ISO-8859-1?Q?a?= b)'
2227 self.assertEqual(decode_header(s),
2228 [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)])
2229
2230 def test_rfc2047_rfc2047_3(self):
2231 # 3rd testcase at end of rfc2047
2232 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2233 self.assertEqual(decode_header(s),
2234 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2235
2236 def test_rfc2047_rfc2047_4(self):
2237 # 4th testcase at end of rfc2047
2238 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2239 self.assertEqual(decode_header(s),
2240 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2241
2242 def test_rfc2047_rfc2047_5a(self):
2243 # 5th testcase at end of rfc2047 newline is \r\n
2244 s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)'
2245 self.assertEqual(decode_header(s),
2246 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2247
2248 def test_rfc2047_rfc2047_5b(self):
2249 # 5th testcase at end of rfc2047 newline is \n
2250 s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)'
2251 self.assertEqual(decode_header(s),
2252 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2253
2254 def test_rfc2047_rfc2047_6(self):
2255 # 6th testcase at end of rfc2047
2256 s = '(=?ISO-8859-1?Q?a_b?=)'
2257 self.assertEqual(decode_header(s),
2258 [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)])
2259
2260 def test_rfc2047_rfc2047_7(self):
2261 # 7th testcase at end of rfc2047
2262 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)'
2263 self.assertEqual(decode_header(s),
2264 [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'),
2265 (b')', None)])
2266 self.assertEqual(make_header(decode_header(s)).encode(), s.lower())
2267 self.assertEqual(str(make_header(decode_header(s))), '(a b)')
2268
R David Murray82ffabd2012-06-03 12:27:07 -04002269 def test_multiline_header(self):
2270 s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>'
2271 self.assertEqual(decode_header(s),
2272 [(b'"M\xfcller T"', 'windows-1252'),
2273 (b'<T.Mueller@xxx.com>', None)])
2274 self.assertEqual(make_header(decode_header(s)).encode(),
2275 ''.join(s.splitlines()))
2276 self.assertEqual(str(make_header(decode_header(s))),
2277 '"Müller T" <T.Mueller@xxx.com>')
2278
Ezio Melottib3aedd42010-11-20 19:04:17 +00002279
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002280# Test the MIMEMessage class
2281class TestMIMEMessage(TestEmailBase):
2282 def setUp(self):
2283 with openfile('msg_11.txt') as fp:
2284 self._text = fp.read()
2285
2286 def test_type_error(self):
2287 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2288
2289 def test_valid_argument(self):
2290 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002291 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002292 subject = 'A sub-message'
2293 m = Message()
2294 m['Subject'] = subject
2295 r = MIMEMessage(m)
2296 eq(r.get_content_type(), 'message/rfc822')
2297 payload = r.get_payload()
2298 unless(isinstance(payload, list))
2299 eq(len(payload), 1)
2300 subpart = payload[0]
2301 unless(subpart is m)
2302 eq(subpart['subject'], subject)
2303
2304 def test_bad_multipart(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002305 msg1 = Message()
2306 msg1['Subject'] = 'subpart 1'
2307 msg2 = Message()
2308 msg2['Subject'] = 'subpart 2'
2309 r = MIMEMessage(msg1)
2310 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2311
2312 def test_generate(self):
2313 # First craft the message to be encapsulated
2314 m = Message()
2315 m['Subject'] = 'An enclosed message'
2316 m.set_payload('Here is the body of the message.\n')
2317 r = MIMEMessage(m)
2318 r['Subject'] = 'The enclosing message'
2319 s = StringIO()
2320 g = Generator(s)
2321 g.flatten(r)
2322 self.assertEqual(s.getvalue(), """\
2323Content-Type: message/rfc822
2324MIME-Version: 1.0
2325Subject: The enclosing message
2326
2327Subject: An enclosed message
2328
2329Here is the body of the message.
2330""")
2331
2332 def test_parse_message_rfc822(self):
2333 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002334 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002335 msg = self._msgobj('msg_11.txt')
2336 eq(msg.get_content_type(), 'message/rfc822')
2337 payload = msg.get_payload()
2338 unless(isinstance(payload, list))
2339 eq(len(payload), 1)
2340 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002341 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002342 eq(submsg['subject'], 'An enclosed message')
2343 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2344
2345 def test_dsn(self):
2346 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002347 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002348 # msg 16 is a Delivery Status Notification, see RFC 1894
2349 msg = self._msgobj('msg_16.txt')
2350 eq(msg.get_content_type(), 'multipart/report')
2351 unless(msg.is_multipart())
2352 eq(len(msg.get_payload()), 3)
2353 # Subpart 1 is a text/plain, human readable section
2354 subpart = msg.get_payload(0)
2355 eq(subpart.get_content_type(), 'text/plain')
2356 eq(subpart.get_payload(), """\
2357This report relates to a message you sent with the following header fields:
2358
2359 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2360 Date: Sun, 23 Sep 2001 20:10:55 -0700
2361 From: "Ian T. Henry" <henryi@oxy.edu>
2362 To: SoCal Raves <scr@socal-raves.org>
2363 Subject: [scr] yeah for Ians!!
2364
2365Your message cannot be delivered to the following recipients:
2366
2367 Recipient address: jangel1@cougar.noc.ucla.edu
2368 Reason: recipient reached disk quota
2369
2370""")
2371 # Subpart 2 contains the machine parsable DSN information. It
2372 # consists of two blocks of headers, represented by two nested Message
2373 # objects.
2374 subpart = msg.get_payload(1)
2375 eq(subpart.get_content_type(), 'message/delivery-status')
2376 eq(len(subpart.get_payload()), 2)
2377 # message/delivery-status should treat each block as a bunch of
2378 # headers, i.e. a bunch of Message objects.
2379 dsn1 = subpart.get_payload(0)
2380 unless(isinstance(dsn1, Message))
2381 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2382 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2383 # Try a missing one <wink>
2384 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2385 dsn2 = subpart.get_payload(1)
2386 unless(isinstance(dsn2, Message))
2387 eq(dsn2['action'], 'failed')
2388 eq(dsn2.get_params(header='original-recipient'),
2389 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2390 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2391 # Subpart 3 is the original message
2392 subpart = msg.get_payload(2)
2393 eq(subpart.get_content_type(), 'message/rfc822')
2394 payload = subpart.get_payload()
2395 unless(isinstance(payload, list))
2396 eq(len(payload), 1)
2397 subsubpart = payload[0]
2398 unless(isinstance(subsubpart, Message))
2399 eq(subsubpart.get_content_type(), 'text/plain')
2400 eq(subsubpart['message-id'],
2401 '<002001c144a6$8752e060$56104586@oxy.edu>')
2402
2403 def test_epilogue(self):
2404 eq = self.ndiffAssertEqual
2405 with openfile('msg_21.txt') as fp:
2406 text = fp.read()
2407 msg = Message()
2408 msg['From'] = 'aperson@dom.ain'
2409 msg['To'] = 'bperson@dom.ain'
2410 msg['Subject'] = 'Test'
2411 msg.preamble = 'MIME message'
2412 msg.epilogue = 'End of MIME message\n'
2413 msg1 = MIMEText('One')
2414 msg2 = MIMEText('Two')
2415 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2416 msg.attach(msg1)
2417 msg.attach(msg2)
2418 sfp = StringIO()
2419 g = Generator(sfp)
2420 g.flatten(msg)
2421 eq(sfp.getvalue(), text)
2422
2423 def test_no_nl_preamble(self):
2424 eq = self.ndiffAssertEqual
2425 msg = Message()
2426 msg['From'] = 'aperson@dom.ain'
2427 msg['To'] = 'bperson@dom.ain'
2428 msg['Subject'] = 'Test'
2429 msg.preamble = 'MIME message'
2430 msg.epilogue = ''
2431 msg1 = MIMEText('One')
2432 msg2 = MIMEText('Two')
2433 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2434 msg.attach(msg1)
2435 msg.attach(msg2)
2436 eq(msg.as_string(), """\
2437From: aperson@dom.ain
2438To: bperson@dom.ain
2439Subject: Test
2440Content-Type: multipart/mixed; boundary="BOUNDARY"
2441
2442MIME message
2443--BOUNDARY
2444Content-Type: text/plain; charset="us-ascii"
2445MIME-Version: 1.0
2446Content-Transfer-Encoding: 7bit
2447
2448One
2449--BOUNDARY
2450Content-Type: text/plain; charset="us-ascii"
2451MIME-Version: 1.0
2452Content-Transfer-Encoding: 7bit
2453
2454Two
2455--BOUNDARY--
2456""")
2457
2458 def test_default_type(self):
2459 eq = self.assertEqual
2460 with openfile('msg_30.txt') as fp:
2461 msg = email.message_from_file(fp)
2462 container1 = msg.get_payload(0)
2463 eq(container1.get_default_type(), 'message/rfc822')
2464 eq(container1.get_content_type(), 'message/rfc822')
2465 container2 = msg.get_payload(1)
2466 eq(container2.get_default_type(), 'message/rfc822')
2467 eq(container2.get_content_type(), 'message/rfc822')
2468 container1a = container1.get_payload(0)
2469 eq(container1a.get_default_type(), 'text/plain')
2470 eq(container1a.get_content_type(), 'text/plain')
2471 container2a = container2.get_payload(0)
2472 eq(container2a.get_default_type(), 'text/plain')
2473 eq(container2a.get_content_type(), 'text/plain')
2474
2475 def test_default_type_with_explicit_container_type(self):
2476 eq = self.assertEqual
2477 with openfile('msg_28.txt') as fp:
2478 msg = email.message_from_file(fp)
2479 container1 = msg.get_payload(0)
2480 eq(container1.get_default_type(), 'message/rfc822')
2481 eq(container1.get_content_type(), 'message/rfc822')
2482 container2 = msg.get_payload(1)
2483 eq(container2.get_default_type(), 'message/rfc822')
2484 eq(container2.get_content_type(), 'message/rfc822')
2485 container1a = container1.get_payload(0)
2486 eq(container1a.get_default_type(), 'text/plain')
2487 eq(container1a.get_content_type(), 'text/plain')
2488 container2a = container2.get_payload(0)
2489 eq(container2a.get_default_type(), 'text/plain')
2490 eq(container2a.get_content_type(), 'text/plain')
2491
2492 def test_default_type_non_parsed(self):
2493 eq = self.assertEqual
2494 neq = self.ndiffAssertEqual
2495 # Set up container
2496 container = MIMEMultipart('digest', 'BOUNDARY')
2497 container.epilogue = ''
2498 # Set up subparts
2499 subpart1a = MIMEText('message 1\n')
2500 subpart2a = MIMEText('message 2\n')
2501 subpart1 = MIMEMessage(subpart1a)
2502 subpart2 = MIMEMessage(subpart2a)
2503 container.attach(subpart1)
2504 container.attach(subpart2)
2505 eq(subpart1.get_content_type(), 'message/rfc822')
2506 eq(subpart1.get_default_type(), 'message/rfc822')
2507 eq(subpart2.get_content_type(), 'message/rfc822')
2508 eq(subpart2.get_default_type(), 'message/rfc822')
2509 neq(container.as_string(0), '''\
2510Content-Type: multipart/digest; boundary="BOUNDARY"
2511MIME-Version: 1.0
2512
2513--BOUNDARY
2514Content-Type: message/rfc822
2515MIME-Version: 1.0
2516
2517Content-Type: text/plain; charset="us-ascii"
2518MIME-Version: 1.0
2519Content-Transfer-Encoding: 7bit
2520
2521message 1
2522
2523--BOUNDARY
2524Content-Type: message/rfc822
2525MIME-Version: 1.0
2526
2527Content-Type: text/plain; charset="us-ascii"
2528MIME-Version: 1.0
2529Content-Transfer-Encoding: 7bit
2530
2531message 2
2532
2533--BOUNDARY--
2534''')
2535 del subpart1['content-type']
2536 del subpart1['mime-version']
2537 del subpart2['content-type']
2538 del subpart2['mime-version']
2539 eq(subpart1.get_content_type(), 'message/rfc822')
2540 eq(subpart1.get_default_type(), 'message/rfc822')
2541 eq(subpart2.get_content_type(), 'message/rfc822')
2542 eq(subpart2.get_default_type(), 'message/rfc822')
2543 neq(container.as_string(0), '''\
2544Content-Type: multipart/digest; boundary="BOUNDARY"
2545MIME-Version: 1.0
2546
2547--BOUNDARY
2548
2549Content-Type: text/plain; charset="us-ascii"
2550MIME-Version: 1.0
2551Content-Transfer-Encoding: 7bit
2552
2553message 1
2554
2555--BOUNDARY
2556
2557Content-Type: text/plain; charset="us-ascii"
2558MIME-Version: 1.0
2559Content-Transfer-Encoding: 7bit
2560
2561message 2
2562
2563--BOUNDARY--
2564''')
2565
2566 def test_mime_attachments_in_constructor(self):
2567 eq = self.assertEqual
2568 text1 = MIMEText('')
2569 text2 = MIMEText('')
2570 msg = MIMEMultipart(_subparts=(text1, text2))
2571 eq(len(msg.get_payload()), 2)
2572 eq(msg.get_payload(0), text1)
2573 eq(msg.get_payload(1), text2)
2574
Christian Heimes587c2bf2008-01-19 16:21:02 +00002575 def test_default_multipart_constructor(self):
2576 msg = MIMEMultipart()
2577 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002578
Ezio Melottib3aedd42010-11-20 19:04:17 +00002579
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002580# A general test of parser->model->generator idempotency. IOW, read a message
2581# in, parse it into a message object tree, then without touching the tree,
2582# regenerate the plain text. The original text and the transformed text
2583# should be identical. Note: that we ignore the Unix-From since that may
2584# contain a changed date.
2585class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002586
2587 linesep = '\n'
2588
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002589 def _msgobj(self, filename):
2590 with openfile(filename) as fp:
2591 data = fp.read()
2592 msg = email.message_from_string(data)
2593 return msg, data
2594
R. David Murray719a4492010-11-21 16:53:48 +00002595 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002596 eq = self.ndiffAssertEqual
2597 s = StringIO()
2598 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002599 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002600 eq(text, s.getvalue())
2601
2602 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002603 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002604 msg, text = self._msgobj('msg_01.txt')
2605 eq(msg.get_content_type(), 'text/plain')
2606 eq(msg.get_content_maintype(), 'text')
2607 eq(msg.get_content_subtype(), 'plain')
2608 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2609 eq(msg.get_param('charset'), 'us-ascii')
2610 eq(msg.preamble, None)
2611 eq(msg.epilogue, None)
2612 self._idempotent(msg, text)
2613
2614 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002615 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002616 msg, text = self._msgobj('msg_03.txt')
2617 eq(msg.get_content_type(), 'text/plain')
2618 eq(msg.get_params(), None)
2619 eq(msg.get_param('charset'), None)
2620 self._idempotent(msg, text)
2621
2622 def test_simple_multipart(self):
2623 msg, text = self._msgobj('msg_04.txt')
2624 self._idempotent(msg, text)
2625
2626 def test_MIME_digest(self):
2627 msg, text = self._msgobj('msg_02.txt')
2628 self._idempotent(msg, text)
2629
2630 def test_long_header(self):
2631 msg, text = self._msgobj('msg_27.txt')
2632 self._idempotent(msg, text)
2633
2634 def test_MIME_digest_with_part_headers(self):
2635 msg, text = self._msgobj('msg_28.txt')
2636 self._idempotent(msg, text)
2637
2638 def test_mixed_with_image(self):
2639 msg, text = self._msgobj('msg_06.txt')
2640 self._idempotent(msg, text)
2641
2642 def test_multipart_report(self):
2643 msg, text = self._msgobj('msg_05.txt')
2644 self._idempotent(msg, text)
2645
2646 def test_dsn(self):
2647 msg, text = self._msgobj('msg_16.txt')
2648 self._idempotent(msg, text)
2649
2650 def test_preamble_epilogue(self):
2651 msg, text = self._msgobj('msg_21.txt')
2652 self._idempotent(msg, text)
2653
2654 def test_multipart_one_part(self):
2655 msg, text = self._msgobj('msg_23.txt')
2656 self._idempotent(msg, text)
2657
2658 def test_multipart_no_parts(self):
2659 msg, text = self._msgobj('msg_24.txt')
2660 self._idempotent(msg, text)
2661
2662 def test_no_start_boundary(self):
2663 msg, text = self._msgobj('msg_31.txt')
2664 self._idempotent(msg, text)
2665
2666 def test_rfc2231_charset(self):
2667 msg, text = self._msgobj('msg_32.txt')
2668 self._idempotent(msg, text)
2669
2670 def test_more_rfc2231_parameters(self):
2671 msg, text = self._msgobj('msg_33.txt')
2672 self._idempotent(msg, text)
2673
2674 def test_text_plain_in_a_multipart_digest(self):
2675 msg, text = self._msgobj('msg_34.txt')
2676 self._idempotent(msg, text)
2677
2678 def test_nested_multipart_mixeds(self):
2679 msg, text = self._msgobj('msg_12a.txt')
2680 self._idempotent(msg, text)
2681
2682 def test_message_external_body_idempotent(self):
2683 msg, text = self._msgobj('msg_36.txt')
2684 self._idempotent(msg, text)
2685
R. David Murray719a4492010-11-21 16:53:48 +00002686 def test_message_delivery_status(self):
2687 msg, text = self._msgobj('msg_43.txt')
2688 self._idempotent(msg, text, unixfrom=True)
2689
R. David Murray96fd54e2010-10-08 15:55:28 +00002690 def test_message_signed_idempotent(self):
2691 msg, text = self._msgobj('msg_45.txt')
2692 self._idempotent(msg, text)
2693
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002694 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002695 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002696 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002697 # Get a message object and reset the seek pointer for other tests
2698 msg, text = self._msgobj('msg_05.txt')
2699 eq(msg.get_content_type(), 'multipart/report')
2700 # Test the Content-Type: parameters
2701 params = {}
2702 for pk, pv in msg.get_params():
2703 params[pk] = pv
2704 eq(params['report-type'], 'delivery-status')
2705 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002706 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2707 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002708 eq(len(msg.get_payload()), 3)
2709 # Make sure the subparts are what we expect
2710 msg1 = msg.get_payload(0)
2711 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002712 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002713 msg2 = msg.get_payload(1)
2714 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002715 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002716 msg3 = msg.get_payload(2)
2717 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002718 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002719 payload = msg3.get_payload()
2720 unless(isinstance(payload, list))
2721 eq(len(payload), 1)
2722 msg4 = payload[0]
2723 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002724 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002725
2726 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002727 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002728 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002729 msg, text = self._msgobj('msg_06.txt')
2730 # Check some of the outer headers
2731 eq(msg.get_content_type(), 'message/rfc822')
2732 # Make sure the payload is a list of exactly one sub-Message, and that
2733 # that submessage has a type of text/plain
2734 payload = msg.get_payload()
2735 unless(isinstance(payload, list))
2736 eq(len(payload), 1)
2737 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002738 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002739 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002740 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002741 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002742
2743
Ezio Melottib3aedd42010-11-20 19:04:17 +00002744
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002745# Test various other bits of the package's functionality
2746class TestMiscellaneous(TestEmailBase):
2747 def test_message_from_string(self):
2748 with openfile('msg_01.txt') as fp:
2749 text = fp.read()
2750 msg = email.message_from_string(text)
2751 s = StringIO()
2752 # Don't wrap/continue long headers since we're trying to test
2753 # idempotency.
2754 g = Generator(s, maxheaderlen=0)
2755 g.flatten(msg)
2756 self.assertEqual(text, s.getvalue())
2757
2758 def test_message_from_file(self):
2759 with openfile('msg_01.txt') as fp:
2760 text = fp.read()
2761 fp.seek(0)
2762 msg = email.message_from_file(fp)
2763 s = StringIO()
2764 # Don't wrap/continue long headers since we're trying to test
2765 # idempotency.
2766 g = Generator(s, maxheaderlen=0)
2767 g.flatten(msg)
2768 self.assertEqual(text, s.getvalue())
2769
2770 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002771 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002772 with openfile('msg_01.txt') as fp:
2773 text = fp.read()
2774
2775 # Create a subclass
2776 class MyMessage(Message):
2777 pass
2778
2779 msg = email.message_from_string(text, MyMessage)
2780 unless(isinstance(msg, MyMessage))
2781 # Try something more complicated
2782 with openfile('msg_02.txt') as fp:
2783 text = fp.read()
2784 msg = email.message_from_string(text, MyMessage)
2785 for subpart in msg.walk():
2786 unless(isinstance(subpart, MyMessage))
2787
2788 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002789 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002790 # Create a subclass
2791 class MyMessage(Message):
2792 pass
2793
2794 with openfile('msg_01.txt') as fp:
2795 msg = email.message_from_file(fp, MyMessage)
2796 unless(isinstance(msg, MyMessage))
2797 # Try something more complicated
2798 with openfile('msg_02.txt') as fp:
2799 msg = email.message_from_file(fp, MyMessage)
2800 for subpart in msg.walk():
2801 unless(isinstance(subpart, MyMessage))
2802
R David Murrayc27e5222012-05-25 15:01:48 -04002803 def test_custom_message_does_not_require_arguments(self):
2804 class MyMessage(Message):
2805 def __init__(self):
2806 super().__init__()
2807 msg = self._str_msg("Subject: test\n\ntest", MyMessage)
2808 self.assertTrue(isinstance(msg, MyMessage))
2809
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002810 def test__all__(self):
2811 module = __import__('email')
R David Murray1b6c7242012-03-16 22:43:05 -04002812 self.assertEqual(sorted(module.__all__), [
2813 'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2814 'generator', 'header', 'iterators', 'message',
2815 'message_from_binary_file', 'message_from_bytes',
2816 'message_from_file', 'message_from_string', 'mime', 'parser',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002817 'quoprimime', 'utils',
2818 ])
2819
2820 def test_formatdate(self):
2821 now = time.time()
2822 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2823 time.gmtime(now)[:6])
2824
2825 def test_formatdate_localtime(self):
2826 now = time.time()
2827 self.assertEqual(
2828 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2829 time.localtime(now)[:6])
2830
2831 def test_formatdate_usegmt(self):
2832 now = time.time()
2833 self.assertEqual(
2834 utils.formatdate(now, localtime=False),
2835 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2836 self.assertEqual(
2837 utils.formatdate(now, localtime=False, usegmt=True),
2838 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2839
Georg Brandl1aca31e2012-09-22 09:03:56 +02002840 # parsedate and parsedate_tz will become deprecated interfaces someday
2841 def test_parsedate_returns_None_for_invalid_strings(self):
2842 self.assertIsNone(utils.parsedate(''))
2843 self.assertIsNone(utils.parsedate_tz(''))
2844 self.assertIsNone(utils.parsedate('0'))
2845 self.assertIsNone(utils.parsedate_tz('0'))
2846 self.assertIsNone(utils.parsedate('A Complete Waste of Time'))
2847 self.assertIsNone(utils.parsedate_tz('A Complete Waste of Time'))
2848 # Not a part of the spec but, but this has historically worked:
2849 self.assertIsNone(utils.parsedate(None))
2850 self.assertIsNone(utils.parsedate_tz(None))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002851
2852 def test_parsedate_compact(self):
2853 # The FWS after the comma is optional
2854 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2855 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2856
2857 def test_parsedate_no_dayofweek(self):
2858 eq = self.assertEqual
2859 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2860 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2861
2862 def test_parsedate_compact_no_dayofweek(self):
2863 eq = self.assertEqual
2864 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2865 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2866
R. David Murray4a62e892010-12-23 20:35:46 +00002867 def test_parsedate_no_space_before_positive_offset(self):
2868 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2869 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2870
2871 def test_parsedate_no_space_before_negative_offset(self):
2872 # Issue 1155362: we already handled '+' for this case.
2873 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2874 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2875
2876
R David Murrayaccd1c02011-03-13 20:06:23 -04002877 def test_parsedate_accepts_time_with_dots(self):
2878 eq = self.assertEqual
2879 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2880 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2881 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2882 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2883
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002884 def test_parsedate_acceptable_to_time_functions(self):
2885 eq = self.assertEqual
2886 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2887 t = int(time.mktime(timetup))
2888 eq(time.localtime(t)[:6], timetup[:6])
2889 eq(int(time.strftime('%Y', timetup)), 2003)
2890 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2891 t = int(time.mktime(timetup[:9]))
2892 eq(time.localtime(t)[:6], timetup[:6])
2893 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2894
Alexander Belopolskya07548e2012-06-21 20:34:09 -04002895 def test_mktime_tz(self):
2896 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2897 -1, -1, -1, 0)), 0)
2898 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2899 -1, -1, -1, 1234)), -1234)
2900
R. David Murray219d1c82010-08-25 00:45:55 +00002901 def test_parsedate_y2k(self):
2902 """Test for parsing a date with a two-digit year.
2903
2904 Parsing a date with a two-digit year should return the correct
2905 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2906 obsoletes RFC822) requires four-digit years.
2907
2908 """
2909 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2910 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2911 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2912 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2913
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002914 def test_parseaddr_empty(self):
2915 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2916 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2917
2918 def test_noquote_dump(self):
2919 self.assertEqual(
2920 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2921 'A Silly Person <person@dom.ain>')
2922
2923 def test_escape_dump(self):
2924 self.assertEqual(
2925 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
R David Murrayb53319f2012-03-14 15:31:47 -04002926 r'"A (Very) Silly Person" <person@dom.ain>')
2927 self.assertEqual(
2928 utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
2929 ('A (Very) Silly Person', 'person@dom.ain'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002930 a = r'A \(Special\) Person'
2931 b = 'person@dom.ain'
2932 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2933
2934 def test_escape_backslashes(self):
2935 self.assertEqual(
2936 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2937 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2938 a = r'Arthur \Backslash\ Foobar'
2939 b = 'person@dom.ain'
2940 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2941
R David Murray8debacb2011-04-06 09:35:57 -04002942 def test_quotes_unicode_names(self):
2943 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2944 name = "H\u00e4ns W\u00fcrst"
2945 addr = 'person@dom.ain'
2946 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2947 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
2948 self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
2949 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
2950 latin1_quopri)
2951
2952 def test_accepts_any_charset_like_object(self):
2953 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2954 name = "H\u00e4ns W\u00fcrst"
2955 addr = 'person@dom.ain'
2956 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2957 foobar = "FOOBAR"
2958 class CharsetMock:
2959 def header_encode(self, string):
2960 return foobar
2961 mock = CharsetMock()
2962 mock_expected = "%s <%s>" % (foobar, addr)
2963 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
2964 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
2965 utf8_base64)
2966
2967 def test_invalid_charset_like_object_raises_error(self):
2968 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2969 name = "H\u00e4ns W\u00fcrst"
2970 addr = 'person@dom.ain'
2971 # A object without a header_encode method:
2972 bad_charset = object()
2973 self.assertRaises(AttributeError, utils.formataddr, (name, addr),
2974 bad_charset)
2975
2976 def test_unicode_address_raises_error(self):
2977 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2978 addr = 'pers\u00f6n@dom.in'
2979 self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
2980 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
2981
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002982 def test_name_with_dot(self):
2983 x = 'John X. Doe <jxd@example.com>'
2984 y = '"John X. Doe" <jxd@example.com>'
2985 a, b = ('John X. Doe', 'jxd@example.com')
2986 self.assertEqual(utils.parseaddr(x), (a, b))
2987 self.assertEqual(utils.parseaddr(y), (a, b))
2988 # formataddr() quotes the name if there's a dot in it
2989 self.assertEqual(utils.formataddr((a, b)), y)
2990
R. David Murray5397e862010-10-02 15:58:26 +00002991 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2992 # issue 10005. Note that in the third test the second pair of
2993 # backslashes is not actually a quoted pair because it is not inside a
2994 # comment or quoted string: the address being parsed has a quoted
2995 # string containing a quoted backslash, followed by 'example' and two
2996 # backslashes, followed by another quoted string containing a space and
2997 # the word 'example'. parseaddr copies those two backslashes
2998 # literally. Per rfc5322 this is not technically correct since a \ may
2999 # not appear in an address outside of a quoted string. It is probably
3000 # a sensible Postel interpretation, though.
3001 eq = self.assertEqual
3002 eq(utils.parseaddr('""example" example"@example.com'),
3003 ('', '""example" example"@example.com'))
3004 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
3005 ('', '"\\"example\\" example"@example.com'))
3006 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
3007 ('', '"\\\\"example\\\\" example"@example.com'))
3008
R. David Murray63563cd2010-12-18 18:25:38 +00003009 def test_parseaddr_preserves_spaces_in_local_part(self):
3010 # issue 9286. A normal RFC5322 local part should not contain any
3011 # folding white space, but legacy local parts can (they are a sequence
3012 # of atoms, not dotatoms). On the other hand we strip whitespace from
3013 # before the @ and around dots, on the assumption that the whitespace
3014 # around the punctuation is a mistake in what would otherwise be
3015 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
3016 self.assertEqual(('', "merwok wok@xample.com"),
3017 utils.parseaddr("merwok wok@xample.com"))
3018 self.assertEqual(('', "merwok wok@xample.com"),
3019 utils.parseaddr("merwok wok@xample.com"))
3020 self.assertEqual(('', "merwok wok@xample.com"),
3021 utils.parseaddr(" merwok wok @xample.com"))
3022 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
3023 utils.parseaddr('merwok"wok" wok@xample.com'))
3024 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
3025 utils.parseaddr('merwok. wok . wok@xample.com'))
3026
R David Murrayb53319f2012-03-14 15:31:47 -04003027 def test_formataddr_does_not_quote_parens_in_quoted_string(self):
3028 addr = ("'foo@example.com' (foo@example.com)",
3029 'foo@example.com')
3030 addrstr = ('"\'foo@example.com\' '
3031 '(foo@example.com)" <foo@example.com>')
3032 self.assertEqual(utils.parseaddr(addrstr), addr)
3033 self.assertEqual(utils.formataddr(addr), addrstr)
3034
3035
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003036 def test_multiline_from_comment(self):
3037 x = """\
3038Foo
3039\tBar <foo@example.com>"""
3040 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
3041
3042 def test_quote_dump(self):
3043 self.assertEqual(
3044 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
3045 r'"A Silly; Person" <person@dom.ain>')
3046
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003047 def test_charset_richcomparisons(self):
3048 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003049 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003050 cset1 = Charset()
3051 cset2 = Charset()
3052 eq(cset1, 'us-ascii')
3053 eq(cset1, 'US-ASCII')
3054 eq(cset1, 'Us-AsCiI')
3055 eq('us-ascii', cset1)
3056 eq('US-ASCII', cset1)
3057 eq('Us-AsCiI', cset1)
3058 ne(cset1, 'usascii')
3059 ne(cset1, 'USASCII')
3060 ne(cset1, 'UsAsCiI')
3061 ne('usascii', cset1)
3062 ne('USASCII', cset1)
3063 ne('UsAsCiI', cset1)
3064 eq(cset1, cset2)
3065 eq(cset2, cset1)
3066
3067 def test_getaddresses(self):
3068 eq = self.assertEqual
3069 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
3070 'Bud Person <bperson@dom.ain>']),
3071 [('Al Person', 'aperson@dom.ain'),
3072 ('Bud Person', 'bperson@dom.ain')])
3073
3074 def test_getaddresses_nasty(self):
3075 eq = self.assertEqual
3076 eq(utils.getaddresses(['foo: ;']), [('', '')])
3077 eq(utils.getaddresses(
3078 ['[]*-- =~$']),
3079 [('', ''), ('', ''), ('', '*--')])
3080 eq(utils.getaddresses(
3081 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
3082 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
3083
3084 def test_getaddresses_embedded_comment(self):
3085 """Test proper handling of a nested comment"""
3086 eq = self.assertEqual
3087 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
3088 eq(addrs[0][1], 'foo@bar.com')
3089
3090 def test_utils_quote_unquote(self):
3091 eq = self.assertEqual
3092 msg = Message()
3093 msg.add_header('content-disposition', 'attachment',
3094 filename='foo\\wacky"name')
3095 eq(msg.get_filename(), 'foo\\wacky"name')
3096
3097 def test_get_body_encoding_with_bogus_charset(self):
3098 charset = Charset('not a charset')
3099 self.assertEqual(charset.get_body_encoding(), 'base64')
3100
3101 def test_get_body_encoding_with_uppercase_charset(self):
3102 eq = self.assertEqual
3103 msg = Message()
3104 msg['Content-Type'] = 'text/plain; charset=UTF-8'
3105 eq(msg['content-type'], 'text/plain; charset=UTF-8')
3106 charsets = msg.get_charsets()
3107 eq(len(charsets), 1)
3108 eq(charsets[0], 'utf-8')
3109 charset = Charset(charsets[0])
3110 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003111 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003112 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
3113 eq(msg.get_payload(decode=True), b'hello world')
3114 eq(msg['content-transfer-encoding'], 'base64')
3115 # Try another one
3116 msg = Message()
3117 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
3118 charsets = msg.get_charsets()
3119 eq(len(charsets), 1)
3120 eq(charsets[0], 'us-ascii')
3121 charset = Charset(charsets[0])
3122 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
3123 msg.set_payload('hello world', charset=charset)
3124 eq(msg.get_payload(), 'hello world')
3125 eq(msg['content-transfer-encoding'], '7bit')
3126
3127 def test_charsets_case_insensitive(self):
3128 lc = Charset('us-ascii')
3129 uc = Charset('US-ASCII')
3130 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
3131
3132 def test_partial_falls_inside_message_delivery_status(self):
3133 eq = self.ndiffAssertEqual
3134 # The Parser interface provides chunks of data to FeedParser in 8192
3135 # byte gulps. SF bug #1076485 found one of those chunks inside
3136 # message/delivery-status header block, which triggered an
3137 # unreadline() of NeedMoreData.
3138 msg = self._msgobj('msg_43.txt')
3139 sfp = StringIO()
3140 iterators._structure(msg, sfp)
3141 eq(sfp.getvalue(), """\
3142multipart/report
3143 text/plain
3144 message/delivery-status
3145 text/plain
3146 text/plain
3147 text/plain
3148 text/plain
3149 text/plain
3150 text/plain
3151 text/plain
3152 text/plain
3153 text/plain
3154 text/plain
3155 text/plain
3156 text/plain
3157 text/plain
3158 text/plain
3159 text/plain
3160 text/plain
3161 text/plain
3162 text/plain
3163 text/plain
3164 text/plain
3165 text/plain
3166 text/plain
3167 text/plain
3168 text/plain
3169 text/plain
3170 text/plain
3171 text/rfc822-headers
3172""")
3173
R. David Murraya0b44b52010-12-02 21:47:19 +00003174 def test_make_msgid_domain(self):
3175 self.assertEqual(
3176 email.utils.make_msgid(domain='testdomain-string')[-19:],
3177 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003178
R David Murraye67c6c52013-03-07 16:38:03 -05003179 def test_Generator_linend(self):
3180 # Issue 14645.
3181 with openfile('msg_26.txt', newline='\n') as f:
3182 msgtxt = f.read()
3183 msgtxt_nl = msgtxt.replace('\r\n', '\n')
3184 msg = email.message_from_string(msgtxt)
3185 s = StringIO()
3186 g = email.generator.Generator(s)
3187 g.flatten(msg)
3188 self.assertEqual(s.getvalue(), msgtxt_nl)
3189
3190 def test_BytesGenerator_linend(self):
3191 # Issue 14645.
3192 with openfile('msg_26.txt', newline='\n') as f:
3193 msgtxt = f.read()
3194 msgtxt_nl = msgtxt.replace('\r\n', '\n')
3195 msg = email.message_from_string(msgtxt_nl)
3196 s = BytesIO()
3197 g = email.generator.BytesGenerator(s)
3198 g.flatten(msg, linesep='\r\n')
3199 self.assertEqual(s.getvalue().decode('ascii'), msgtxt)
3200
3201 def test_BytesGenerator_linend_with_non_ascii(self):
3202 # Issue 14645.
3203 with openfile('msg_26.txt', 'rb') as f:
3204 msgtxt = f.read()
3205 msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6')
3206 msgtxt_nl = msgtxt.replace(b'\r\n', b'\n')
3207 msg = email.message_from_bytes(msgtxt_nl)
3208 s = BytesIO()
3209 g = email.generator.BytesGenerator(s)
3210 g.flatten(msg, linesep='\r\n')
3211 self.assertEqual(s.getvalue(), msgtxt)
3212
Ezio Melottib3aedd42010-11-20 19:04:17 +00003213
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003214# Test the iterator/generators
3215class TestIterators(TestEmailBase):
3216 def test_body_line_iterator(self):
3217 eq = self.assertEqual
3218 neq = self.ndiffAssertEqual
3219 # First a simple non-multipart message
3220 msg = self._msgobj('msg_01.txt')
3221 it = iterators.body_line_iterator(msg)
3222 lines = list(it)
3223 eq(len(lines), 6)
3224 neq(EMPTYSTRING.join(lines), msg.get_payload())
3225 # Now a more complicated multipart
3226 msg = self._msgobj('msg_02.txt')
3227 it = iterators.body_line_iterator(msg)
3228 lines = list(it)
3229 eq(len(lines), 43)
3230 with openfile('msg_19.txt') as fp:
3231 neq(EMPTYSTRING.join(lines), fp.read())
3232
3233 def test_typed_subpart_iterator(self):
3234 eq = self.assertEqual
3235 msg = self._msgobj('msg_04.txt')
3236 it = iterators.typed_subpart_iterator(msg, 'text')
3237 lines = []
3238 subparts = 0
3239 for subpart in it:
3240 subparts += 1
3241 lines.append(subpart.get_payload())
3242 eq(subparts, 2)
3243 eq(EMPTYSTRING.join(lines), """\
3244a simple kind of mirror
3245to reflect upon our own
3246a simple kind of mirror
3247to reflect upon our own
3248""")
3249
3250 def test_typed_subpart_iterator_default_type(self):
3251 eq = self.assertEqual
3252 msg = self._msgobj('msg_03.txt')
3253 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
3254 lines = []
3255 subparts = 0
3256 for subpart in it:
3257 subparts += 1
3258 lines.append(subpart.get_payload())
3259 eq(subparts, 1)
3260 eq(EMPTYSTRING.join(lines), """\
3261
3262Hi,
3263
3264Do you like this message?
3265
3266-Me
3267""")
3268
R. David Murray45bf773f2010-07-17 01:19:57 +00003269 def test_pushCR_LF(self):
3270 '''FeedParser BufferedSubFile.push() assumed it received complete
3271 line endings. A CR ending one push() followed by a LF starting
3272 the next push() added an empty line.
3273 '''
3274 imt = [
3275 ("a\r \n", 2),
3276 ("b", 0),
3277 ("c\n", 1),
3278 ("", 0),
3279 ("d\r\n", 1),
3280 ("e\r", 0),
3281 ("\nf", 1),
3282 ("\r\n", 1),
3283 ]
3284 from email.feedparser import BufferedSubFile, NeedMoreData
3285 bsf = BufferedSubFile()
3286 om = []
3287 nt = 0
3288 for il, n in imt:
3289 bsf.push(il)
3290 nt += n
3291 n1 = 0
3292 while True:
3293 ol = bsf.readline()
3294 if ol == NeedMoreData:
3295 break
3296 om.append(ol)
3297 n1 += 1
3298 self.assertTrue(n == n1)
3299 self.assertTrue(len(om) == nt)
3300 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
3301
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003302
Ezio Melottib3aedd42010-11-20 19:04:17 +00003303
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003304class TestParsers(TestEmailBase):
R David Murrayb35c8502011-04-13 16:46:05 -04003305
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003306 def test_header_parser(self):
3307 eq = self.assertEqual
3308 # Parse only the headers of a complex multipart MIME document
3309 with openfile('msg_02.txt') as fp:
3310 msg = HeaderParser().parse(fp)
3311 eq(msg['from'], 'ppp-request@zzz.org')
3312 eq(msg['to'], 'ppp@zzz.org')
3313 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003314 self.assertFalse(msg.is_multipart())
3315 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003316
R David Murrayb35c8502011-04-13 16:46:05 -04003317 def test_bytes_header_parser(self):
3318 eq = self.assertEqual
3319 # Parse only the headers of a complex multipart MIME document
3320 with openfile('msg_02.txt', 'rb') as fp:
3321 msg = email.parser.BytesHeaderParser().parse(fp)
3322 eq(msg['from'], 'ppp-request@zzz.org')
3323 eq(msg['to'], 'ppp@zzz.org')
3324 eq(msg.get_content_type(), 'multipart/mixed')
3325 self.assertFalse(msg.is_multipart())
3326 self.assertTrue(isinstance(msg.get_payload(), str))
3327 self.assertTrue(isinstance(msg.get_payload(decode=True), bytes))
3328
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003329 def test_whitespace_continuation(self):
3330 eq = self.assertEqual
3331 # This message contains a line after the Subject: header that has only
3332 # whitespace, but it is not empty!
3333 msg = email.message_from_string("""\
3334From: aperson@dom.ain
3335To: bperson@dom.ain
3336Subject: the next line has a space on it
3337\x20
3338Date: Mon, 8 Apr 2002 15:09:19 -0400
3339Message-ID: spam
3340
3341Here's the message body
3342""")
3343 eq(msg['subject'], 'the next line has a space on it\n ')
3344 eq(msg['message-id'], 'spam')
3345 eq(msg.get_payload(), "Here's the message body\n")
3346
3347 def test_whitespace_continuation_last_header(self):
3348 eq = self.assertEqual
3349 # Like the previous test, but the subject line is the last
3350 # header.
3351 msg = email.message_from_string("""\
3352From: aperson@dom.ain
3353To: bperson@dom.ain
3354Date: Mon, 8 Apr 2002 15:09:19 -0400
3355Message-ID: spam
3356Subject: the next line has a space on it
3357\x20
3358
3359Here's the message body
3360""")
3361 eq(msg['subject'], 'the next line has a space on it\n ')
3362 eq(msg['message-id'], 'spam')
3363 eq(msg.get_payload(), "Here's the message body\n")
3364
3365 def test_crlf_separation(self):
3366 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003367 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003368 msg = Parser().parse(fp)
3369 eq(len(msg.get_payload()), 2)
3370 part1 = msg.get_payload(0)
3371 eq(part1.get_content_type(), 'text/plain')
3372 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3373 part2 = msg.get_payload(1)
3374 eq(part2.get_content_type(), 'application/riscos')
3375
R. David Murray8451c4b2010-10-23 22:19:56 +00003376 def test_crlf_flatten(self):
3377 # Using newline='\n' preserves the crlfs in this input file.
3378 with openfile('msg_26.txt', newline='\n') as fp:
3379 text = fp.read()
3380 msg = email.message_from_string(text)
3381 s = StringIO()
3382 g = Generator(s)
3383 g.flatten(msg, linesep='\r\n')
3384 self.assertEqual(s.getvalue(), text)
3385
3386 maxDiff = None
3387
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003388 def test_multipart_digest_with_extra_mime_headers(self):
3389 eq = self.assertEqual
3390 neq = self.ndiffAssertEqual
3391 with openfile('msg_28.txt') as fp:
3392 msg = email.message_from_file(fp)
3393 # Structure is:
3394 # multipart/digest
3395 # message/rfc822
3396 # text/plain
3397 # message/rfc822
3398 # text/plain
3399 eq(msg.is_multipart(), 1)
3400 eq(len(msg.get_payload()), 2)
3401 part1 = msg.get_payload(0)
3402 eq(part1.get_content_type(), 'message/rfc822')
3403 eq(part1.is_multipart(), 1)
3404 eq(len(part1.get_payload()), 1)
3405 part1a = part1.get_payload(0)
3406 eq(part1a.is_multipart(), 0)
3407 eq(part1a.get_content_type(), 'text/plain')
3408 neq(part1a.get_payload(), 'message 1\n')
3409 # next message/rfc822
3410 part2 = msg.get_payload(1)
3411 eq(part2.get_content_type(), 'message/rfc822')
3412 eq(part2.is_multipart(), 1)
3413 eq(len(part2.get_payload()), 1)
3414 part2a = part2.get_payload(0)
3415 eq(part2a.is_multipart(), 0)
3416 eq(part2a.get_content_type(), 'text/plain')
3417 neq(part2a.get_payload(), 'message 2\n')
3418
3419 def test_three_lines(self):
3420 # A bug report by Andrew McNamara
3421 lines = ['From: Andrew Person <aperson@dom.ain',
3422 'Subject: Test',
3423 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3424 msg = email.message_from_string(NL.join(lines))
3425 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3426
3427 def test_strip_line_feed_and_carriage_return_in_headers(self):
3428 eq = self.assertEqual
3429 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3430 value1 = 'text'
3431 value2 = 'more text'
3432 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3433 value1, value2)
3434 msg = email.message_from_string(m)
3435 eq(msg.get('Header'), value1)
3436 eq(msg.get('Next-Header'), value2)
3437
3438 def test_rfc2822_header_syntax(self):
3439 eq = self.assertEqual
3440 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3441 msg = email.message_from_string(m)
3442 eq(len(msg), 3)
3443 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3444 eq(msg.get_payload(), 'body')
3445
3446 def test_rfc2822_space_not_allowed_in_header(self):
3447 eq = self.assertEqual
3448 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3449 msg = email.message_from_string(m)
3450 eq(len(msg.keys()), 0)
3451
3452 def test_rfc2822_one_character_header(self):
3453 eq = self.assertEqual
3454 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3455 msg = email.message_from_string(m)
3456 headers = msg.keys()
3457 headers.sort()
3458 eq(headers, ['A', 'B', 'CC'])
3459 eq(msg.get_payload(), 'body')
3460
R. David Murray45e0e142010-06-16 02:19:40 +00003461 def test_CRLFLF_at_end_of_part(self):
3462 # issue 5610: feedparser should not eat two chars from body part ending
3463 # with "\r\n\n".
3464 m = (
3465 "From: foo@bar.com\n"
3466 "To: baz\n"
3467 "Mime-Version: 1.0\n"
3468 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3469 "\n"
3470 "--BOUNDARY\n"
3471 "Content-Type: text/plain\n"
3472 "\n"
3473 "body ending with CRLF newline\r\n"
3474 "\n"
3475 "--BOUNDARY--\n"
3476 )
3477 msg = email.message_from_string(m)
3478 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003479
Ezio Melottib3aedd42010-11-20 19:04:17 +00003480
R. David Murray96fd54e2010-10-08 15:55:28 +00003481class Test8BitBytesHandling(unittest.TestCase):
3482 # In Python3 all input is string, but that doesn't work if the actual input
3483 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3484 # decode byte streams using the surrogateescape error handler, and
3485 # reconvert to binary at appropriate places if we detect surrogates. This
3486 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3487 # but it does allow us to parse and preserve them, and to decode body
3488 # parts that use an 8bit CTE.
3489
3490 bodytest_msg = textwrap.dedent("""\
3491 From: foo@bar.com
3492 To: baz
3493 Mime-Version: 1.0
3494 Content-Type: text/plain; charset={charset}
3495 Content-Transfer-Encoding: {cte}
3496
3497 {bodyline}
3498 """)
3499
3500 def test_known_8bit_CTE(self):
3501 m = self.bodytest_msg.format(charset='utf-8',
3502 cte='8bit',
3503 bodyline='pöstal').encode('utf-8')
3504 msg = email.message_from_bytes(m)
3505 self.assertEqual(msg.get_payload(), "pöstal\n")
3506 self.assertEqual(msg.get_payload(decode=True),
3507 "pöstal\n".encode('utf-8'))
3508
3509 def test_unknown_8bit_CTE(self):
3510 m = self.bodytest_msg.format(charset='notavalidcharset',
3511 cte='8bit',
3512 bodyline='pöstal').encode('utf-8')
3513 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003514 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003515 self.assertEqual(msg.get_payload(decode=True),
3516 "pöstal\n".encode('utf-8'))
3517
3518 def test_8bit_in_quopri_body(self):
3519 # This is non-RFC compliant data...without 'decode' the library code
3520 # decodes the body using the charset from the headers, and because the
3521 # source byte really is utf-8 this works. This is likely to fail
3522 # against real dirty data (ie: produce mojibake), but the data is
3523 # invalid anyway so it is as good a guess as any. But this means that
3524 # this test just confirms the current behavior; that behavior is not
3525 # necessarily the best possible behavior. With 'decode' it is
3526 # returning the raw bytes, so that test should be of correct behavior,
3527 # or at least produce the same result that email4 did.
3528 m = self.bodytest_msg.format(charset='utf-8',
3529 cte='quoted-printable',
3530 bodyline='p=C3=B6stál').encode('utf-8')
3531 msg = email.message_from_bytes(m)
3532 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3533 self.assertEqual(msg.get_payload(decode=True),
3534 'pöstál\n'.encode('utf-8'))
3535
3536 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3537 # This is similar to the previous test, but proves that if the 8bit
3538 # byte is undecodeable in the specified charset, it gets replaced
3539 # by the unicode 'unknown' character. Again, this may or may not
3540 # be the ideal behavior. Note that if decode=False none of the
3541 # decoders will get involved, so this is the only test we need
3542 # for this behavior.
3543 m = self.bodytest_msg.format(charset='ascii',
3544 cte='quoted-printable',
3545 bodyline='p=C3=B6stál').encode('utf-8')
3546 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003547 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003548 self.assertEqual(msg.get_payload(decode=True),
3549 'pöstál\n'.encode('utf-8'))
3550
R David Murray80e0aee2012-05-27 21:23:34 -04003551 # test_defect_handling:test_invalid_chars_in_base64_payload
R. David Murray96fd54e2010-10-08 15:55:28 +00003552 def test_8bit_in_base64_body(self):
R David Murray80e0aee2012-05-27 21:23:34 -04003553 # If we get 8bit bytes in a base64 body, we can just ignore them
3554 # as being outside the base64 alphabet and decode anyway. But
3555 # we register a defect.
R. David Murray96fd54e2010-10-08 15:55:28 +00003556 m = self.bodytest_msg.format(charset='utf-8',
3557 cte='base64',
3558 bodyline='cMO2c3RhbAá=').encode('utf-8')
3559 msg = email.message_from_bytes(m)
3560 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -04003561 'pöstal'.encode('utf-8'))
3562 self.assertIsInstance(msg.defects[0],
3563 errors.InvalidBase64CharactersDefect)
R. David Murray96fd54e2010-10-08 15:55:28 +00003564
3565 def test_8bit_in_uuencode_body(self):
3566 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3567 # normal means, so the block is returned undecoded, but as bytes.
3568 m = self.bodytest_msg.format(charset='utf-8',
3569 cte='uuencode',
3570 bodyline='<,.V<W1A; á ').encode('utf-8')
3571 msg = email.message_from_bytes(m)
3572 self.assertEqual(msg.get_payload(decode=True),
3573 '<,.V<W1A; á \n'.encode('utf-8'))
3574
3575
R. David Murray92532142011-01-07 23:25:30 +00003576 headertest_headers = (
3577 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3578 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3579 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3580 '\tJean de Baddie',
3581 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3582 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3583 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3584 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3585 )
3586 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3587 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003588
3589 def test_get_8bit_header(self):
3590 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003591 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3592 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003593
3594 def test_print_8bit_headers(self):
3595 msg = email.message_from_bytes(self.headertest_msg)
3596 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003597 textwrap.dedent("""\
3598 From: {}
3599 To: {}
3600 Subject: {}
3601 From: {}
3602
3603 Yes, they are flying.
3604 """).format(*[expected[1] for (_, expected) in
3605 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003606
3607 def test_values_with_8bit_headers(self):
3608 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003609 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003610 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003611 'b\uFFFD\uFFFDz',
3612 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3613 'coll\uFFFD\uFFFDgue, le pouf '
3614 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003615 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003616 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003617
3618 def test_items_with_8bit_headers(self):
3619 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003620 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003621 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003622 ('To', 'b\uFFFD\uFFFDz'),
3623 ('Subject', 'Maintenant je vous '
3624 'pr\uFFFD\uFFFDsente '
3625 'mon coll\uFFFD\uFFFDgue, le pouf '
3626 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3627 '\tJean de Baddie'),
3628 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003629
3630 def test_get_all_with_8bit_headers(self):
3631 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003632 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003633 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003634 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003635
R David Murraya2150232011-03-16 21:11:23 -04003636 def test_get_content_type_with_8bit(self):
3637 msg = email.message_from_bytes(textwrap.dedent("""\
3638 Content-Type: text/pl\xA7in; charset=utf-8
3639 """).encode('latin-1'))
3640 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3641 self.assertEqual(msg.get_content_maintype(), "text")
3642 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3643
R David Murray97f43c02012-06-24 05:03:27 -04003644 # test_headerregistry.TestContentTypeHeader.non_ascii_in_params
R David Murraya2150232011-03-16 21:11:23 -04003645 def test_get_params_with_8bit(self):
3646 msg = email.message_from_bytes(
3647 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3648 self.assertEqual(msg.get_params(header='x-header'),
3649 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3650 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3651 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3652 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3653
R David Murray97f43c02012-06-24 05:03:27 -04003654 # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value
R David Murraya2150232011-03-16 21:11:23 -04003655 def test_get_rfc2231_params_with_8bit(self):
3656 msg = email.message_from_bytes(textwrap.dedent("""\
3657 Content-Type: text/plain; charset=us-ascii;
3658 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3659 ).encode('latin-1'))
3660 self.assertEqual(msg.get_param('title'),
3661 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3662
3663 def test_set_rfc2231_params_with_8bit(self):
3664 msg = email.message_from_bytes(textwrap.dedent("""\
3665 Content-Type: text/plain; charset=us-ascii;
3666 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3667 ).encode('latin-1'))
3668 msg.set_param('title', 'test')
3669 self.assertEqual(msg.get_param('title'), 'test')
3670
3671 def test_del_rfc2231_params_with_8bit(self):
3672 msg = email.message_from_bytes(textwrap.dedent("""\
3673 Content-Type: text/plain; charset=us-ascii;
3674 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3675 ).encode('latin-1'))
3676 msg.del_param('title')
3677 self.assertEqual(msg.get_param('title'), None)
3678 self.assertEqual(msg.get_content_maintype(), 'text')
3679
3680 def test_get_payload_with_8bit_cte_header(self):
3681 msg = email.message_from_bytes(textwrap.dedent("""\
3682 Content-Transfer-Encoding: b\xa7se64
3683 Content-Type: text/plain; charset=latin-1
3684
3685 payload
3686 """).encode('latin-1'))
3687 self.assertEqual(msg.get_payload(), 'payload\n')
3688 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3689
R. David Murray96fd54e2010-10-08 15:55:28 +00003690 non_latin_bin_msg = textwrap.dedent("""\
3691 From: foo@bar.com
3692 To: báz
3693 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3694 \tJean de Baddie
3695 Mime-Version: 1.0
3696 Content-Type: text/plain; charset="utf-8"
3697 Content-Transfer-Encoding: 8bit
3698
3699 Да, они летят.
3700 """).encode('utf-8')
3701
3702 def test_bytes_generator(self):
3703 msg = email.message_from_bytes(self.non_latin_bin_msg)
3704 out = BytesIO()
3705 email.generator.BytesGenerator(out).flatten(msg)
3706 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3707
R. David Murray7372a072011-01-26 21:21:32 +00003708 def test_bytes_generator_handles_None_body(self):
3709 #Issue 11019
3710 msg = email.message.Message()
3711 out = BytesIO()
3712 email.generator.BytesGenerator(out).flatten(msg)
3713 self.assertEqual(out.getvalue(), b"\n")
3714
R. David Murray92532142011-01-07 23:25:30 +00003715 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003716 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003717 To: =?unknown-8bit?q?b=C3=A1z?=
3718 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3719 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3720 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003721 Mime-Version: 1.0
3722 Content-Type: text/plain; charset="utf-8"
3723 Content-Transfer-Encoding: base64
3724
3725 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3726 """)
3727
3728 def test_generator_handles_8bit(self):
3729 msg = email.message_from_bytes(self.non_latin_bin_msg)
3730 out = StringIO()
3731 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003732 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003733
3734 def test_bytes_generator_with_unix_from(self):
3735 # The unixfrom contains a current date, so we can't check it
3736 # literally. Just make sure the first word is 'From' and the
3737 # rest of the message matches the input.
3738 msg = email.message_from_bytes(self.non_latin_bin_msg)
3739 out = BytesIO()
3740 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3741 lines = out.getvalue().split(b'\n')
3742 self.assertEqual(lines[0].split()[0], b'From')
3743 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3744
R. David Murray92532142011-01-07 23:25:30 +00003745 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3746 non_latin_bin_msg_as7bit[2:4] = [
3747 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3748 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3749 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3750
R. David Murray96fd54e2010-10-08 15:55:28 +00003751 def test_message_from_binary_file(self):
3752 fn = 'test.msg'
3753 self.addCleanup(unlink, fn)
3754 with open(fn, 'wb') as testfile:
3755 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003756 with open(fn, 'rb') as testfile:
3757 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003758 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3759
3760 latin_bin_msg = textwrap.dedent("""\
3761 From: foo@bar.com
3762 To: Dinsdale
3763 Subject: Nudge nudge, wink, wink
3764 Mime-Version: 1.0
3765 Content-Type: text/plain; charset="latin-1"
3766 Content-Transfer-Encoding: 8bit
3767
3768 oh là là, know what I mean, know what I mean?
3769 """).encode('latin-1')
3770
3771 latin_bin_msg_as7bit = textwrap.dedent("""\
3772 From: foo@bar.com
3773 To: Dinsdale
3774 Subject: Nudge nudge, wink, wink
3775 Mime-Version: 1.0
3776 Content-Type: text/plain; charset="iso-8859-1"
3777 Content-Transfer-Encoding: quoted-printable
3778
3779 oh l=E0 l=E0, know what I mean, know what I mean?
3780 """)
3781
3782 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3783 m = email.message_from_bytes(self.latin_bin_msg)
3784 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3785
3786 def test_decoded_generator_emits_unicode_body(self):
3787 m = email.message_from_bytes(self.latin_bin_msg)
3788 out = StringIO()
3789 email.generator.DecodedGenerator(out).flatten(m)
3790 #DecodedHeader output contains an extra blank line compared
3791 #to the input message. RDM: not sure if this is a bug or not,
3792 #but it is not specific to the 8bit->7bit conversion.
3793 self.assertEqual(out.getvalue(),
3794 self.latin_bin_msg.decode('latin-1')+'\n')
3795
3796 def test_bytes_feedparser(self):
3797 bfp = email.feedparser.BytesFeedParser()
3798 for i in range(0, len(self.latin_bin_msg), 10):
3799 bfp.feed(self.latin_bin_msg[i:i+10])
3800 m = bfp.close()
3801 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3802
R. David Murray8451c4b2010-10-23 22:19:56 +00003803 def test_crlf_flatten(self):
3804 with openfile('msg_26.txt', 'rb') as fp:
3805 text = fp.read()
3806 msg = email.message_from_bytes(text)
3807 s = BytesIO()
3808 g = email.generator.BytesGenerator(s)
3809 g.flatten(msg, linesep='\r\n')
3810 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003811
3812 def test_8bit_multipart(self):
3813 # Issue 11605
3814 source = textwrap.dedent("""\
3815 Date: Fri, 18 Mar 2011 17:15:43 +0100
3816 To: foo@example.com
3817 From: foodwatch-Newsletter <bar@example.com>
3818 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3819 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3820 MIME-Version: 1.0
3821 Content-Type: multipart/alternative;
3822 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3823
3824 --b1_76a486bee62b0d200f33dc2ca08220ad
3825 Content-Type: text/plain; charset="utf-8"
3826 Content-Transfer-Encoding: 8bit
3827
3828 Guten Tag, ,
3829
3830 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3831 Nachrichten aus Japan.
3832
3833
3834 --b1_76a486bee62b0d200f33dc2ca08220ad
3835 Content-Type: text/html; charset="utf-8"
3836 Content-Transfer-Encoding: 8bit
3837
3838 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3839 "http://www.w3.org/TR/html4/loose.dtd">
3840 <html lang="de">
3841 <head>
3842 <title>foodwatch - Newsletter</title>
3843 </head>
3844 <body>
3845 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3846 die Nachrichten aus Japan.</p>
3847 </body>
3848 </html>
3849 --b1_76a486bee62b0d200f33dc2ca08220ad--
3850
3851 """).encode('utf-8')
3852 msg = email.message_from_bytes(source)
3853 s = BytesIO()
3854 g = email.generator.BytesGenerator(s)
3855 g.flatten(msg)
3856 self.assertEqual(s.getvalue(), source)
3857
R David Murray9fd170e2012-03-14 14:05:03 -04003858 def test_bytes_generator_b_encoding_linesep(self):
3859 # Issue 14062: b encoding was tacking on an extra \n.
3860 m = Message()
3861 # This has enough non-ascii that it should always end up b encoded.
3862 m['Subject'] = Header('žluťoučký kůň')
3863 s = BytesIO()
3864 g = email.generator.BytesGenerator(s)
3865 g.flatten(m, linesep='\r\n')
3866 self.assertEqual(
3867 s.getvalue(),
3868 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3869
3870 def test_generator_b_encoding_linesep(self):
3871 # Since this broke in ByteGenerator, test Generator for completeness.
3872 m = Message()
3873 # This has enough non-ascii that it should always end up b encoded.
3874 m['Subject'] = Header('žluťoučký kůň')
3875 s = StringIO()
3876 g = email.generator.Generator(s)
3877 g.flatten(m, linesep='\r\n')
3878 self.assertEqual(
3879 s.getvalue(),
3880 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3881
R. David Murray8451c4b2010-10-23 22:19:56 +00003882 maxDiff = None
3883
Ezio Melottib3aedd42010-11-20 19:04:17 +00003884
R. David Murray719a4492010-11-21 16:53:48 +00003885class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003886
R. David Murraye5db2632010-11-20 15:10:13 +00003887 maxDiff = None
3888
R. David Murray96fd54e2010-10-08 15:55:28 +00003889 def _msgobj(self, filename):
3890 with openfile(filename, 'rb') as fp:
3891 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003892 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003893 msg = email.message_from_bytes(data)
3894 return msg, data
3895
R. David Murray719a4492010-11-21 16:53:48 +00003896 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003897 b = BytesIO()
3898 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003899 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R David Murraya46ed112011-03-31 13:11:40 -04003900 self.assertEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003901
3902
R. David Murray719a4492010-11-21 16:53:48 +00003903class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3904 TestIdempotent):
3905 linesep = '\n'
3906 blinesep = b'\n'
3907 normalize_linesep_regex = re.compile(br'\r\n')
3908
3909
3910class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3911 TestIdempotent):
3912 linesep = '\r\n'
3913 blinesep = b'\r\n'
3914 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3915
Ezio Melottib3aedd42010-11-20 19:04:17 +00003916
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003917class TestBase64(unittest.TestCase):
3918 def test_len(self):
3919 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003920 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003921 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003922 for size in range(15):
3923 if size == 0 : bsize = 0
3924 elif size <= 3 : bsize = 4
3925 elif size <= 6 : bsize = 8
3926 elif size <= 9 : bsize = 12
3927 elif size <= 12: bsize = 16
3928 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003929 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003930
3931 def test_decode(self):
3932 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003933 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003934 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003935
3936 def test_encode(self):
3937 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003938 eq(base64mime.body_encode(b''), b'')
3939 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003940 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003941 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003942 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003943 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003944eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3945eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3946eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3947eHh4eCB4eHh4IA==
3948""")
3949 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003950 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003951 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003952eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3953eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3954eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3955eHh4eCB4eHh4IA==\r
3956""")
3957
3958 def test_header_encode(self):
3959 eq = self.assertEqual
3960 he = base64mime.header_encode
3961 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003962 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3963 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003964 # Test the charset option
3965 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3966 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003967
3968
Ezio Melottib3aedd42010-11-20 19:04:17 +00003969
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003970class TestQuopri(unittest.TestCase):
3971 def setUp(self):
3972 # Set of characters (as byte integers) that don't need to be encoded
3973 # in headers.
3974 self.hlit = list(chain(
3975 range(ord('a'), ord('z') + 1),
3976 range(ord('A'), ord('Z') + 1),
3977 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003978 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003979 # Set of characters (as byte integers) that do need to be encoded in
3980 # headers.
3981 self.hnon = [c for c in range(256) if c not in self.hlit]
3982 assert len(self.hlit) + len(self.hnon) == 256
3983 # Set of characters (as byte integers) that don't need to be encoded
3984 # in bodies.
3985 self.blit = list(range(ord(' '), ord('~') + 1))
3986 self.blit.append(ord('\t'))
3987 self.blit.remove(ord('='))
3988 # Set of characters (as byte integers) that do need to be encoded in
3989 # bodies.
3990 self.bnon = [c for c in range(256) if c not in self.blit]
3991 assert len(self.blit) + len(self.bnon) == 256
3992
Guido van Rossum9604e662007-08-30 03:46:43 +00003993 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003994 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003995 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003996 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003997 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003998 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003999 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004000
Guido van Rossum9604e662007-08-30 03:46:43 +00004001 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004002 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004003 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00004004 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004005 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004006 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00004007 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004008
4009 def test_header_quopri_len(self):
4010 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00004011 eq(quoprimime.header_length(b'hello'), 5)
4012 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004013 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00004014 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004015 # =?xxx?q?...?= means 10 extra characters
4016 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00004017 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
4018 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004019 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00004020 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004021 # =?xxx?q?...?= means 10 extra characters
4022 10)
4023 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00004024 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004025 'expected length 1 for %r' % chr(c))
4026 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00004027 # Space is special; it's encoded to _
4028 if c == ord(' '):
4029 continue
4030 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004031 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00004032 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004033
4034 def test_body_quopri_len(self):
4035 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004036 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00004037 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004038 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00004039 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004040
4041 def test_quote_unquote_idempotent(self):
4042 for x in range(256):
4043 c = chr(x)
4044 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
4045
R David Murrayec1b5b82011-03-23 14:19:05 -04004046 def _test_header_encode(self, header, expected_encoded_header, charset=None):
4047 if charset is None:
4048 encoded_header = quoprimime.header_encode(header)
4049 else:
4050 encoded_header = quoprimime.header_encode(header, charset)
4051 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004052
R David Murraycafd79d2011-03-23 15:25:55 -04004053 def test_header_encode_null(self):
4054 self._test_header_encode(b'', '')
4055
R David Murrayec1b5b82011-03-23 14:19:05 -04004056 def test_header_encode_one_word(self):
4057 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
4058
4059 def test_header_encode_two_lines(self):
4060 self._test_header_encode(b'hello\nworld',
4061 '=?iso-8859-1?q?hello=0Aworld?=')
4062
4063 def test_header_encode_non_ascii(self):
4064 self._test_header_encode(b'hello\xc7there',
4065 '=?iso-8859-1?q?hello=C7there?=')
4066
4067 def test_header_encode_alt_charset(self):
4068 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
4069 charset='iso-8859-2')
4070
4071 def _test_header_decode(self, encoded_header, expected_decoded_header):
4072 decoded_header = quoprimime.header_decode(encoded_header)
4073 self.assertEqual(decoded_header, expected_decoded_header)
4074
4075 def test_header_decode_null(self):
4076 self._test_header_decode('', '')
4077
4078 def test_header_decode_one_word(self):
4079 self._test_header_decode('hello', 'hello')
4080
4081 def test_header_decode_two_lines(self):
4082 self._test_header_decode('hello=0Aworld', 'hello\nworld')
4083
4084 def test_header_decode_non_ascii(self):
4085 self._test_header_decode('hello=C7there', 'hello\xc7there')
4086
Ezio Melotti2a99d5d2013-07-06 17:16:04 +02004087 def test_header_decode_re_bug_18380(self):
4088 # Issue 18380: Call re.sub with a positional argument for flags in the wrong position
4089 self.assertEqual(quoprimime.header_decode('=30' * 257), '0' * 257)
4090
R David Murrayec1b5b82011-03-23 14:19:05 -04004091 def _test_decode(self, encoded, expected_decoded, eol=None):
4092 if eol is None:
4093 decoded = quoprimime.decode(encoded)
4094 else:
4095 decoded = quoprimime.decode(encoded, eol=eol)
4096 self.assertEqual(decoded, expected_decoded)
4097
4098 def test_decode_null_word(self):
4099 self._test_decode('', '')
4100
4101 def test_decode_null_line_null_word(self):
4102 self._test_decode('\r\n', '\n')
4103
4104 def test_decode_one_word(self):
4105 self._test_decode('hello', 'hello')
4106
4107 def test_decode_one_word_eol(self):
4108 self._test_decode('hello', 'hello', eol='X')
4109
4110 def test_decode_one_line(self):
4111 self._test_decode('hello\r\n', 'hello\n')
4112
4113 def test_decode_one_line_lf(self):
4114 self._test_decode('hello\n', 'hello\n')
4115
R David Murraycafd79d2011-03-23 15:25:55 -04004116 def test_decode_one_line_cr(self):
4117 self._test_decode('hello\r', 'hello\n')
4118
4119 def test_decode_one_line_nl(self):
4120 self._test_decode('hello\n', 'helloX', eol='X')
4121
4122 def test_decode_one_line_crnl(self):
4123 self._test_decode('hello\r\n', 'helloX', eol='X')
4124
R David Murrayec1b5b82011-03-23 14:19:05 -04004125 def test_decode_one_line_one_word(self):
4126 self._test_decode('hello\r\nworld', 'hello\nworld')
4127
4128 def test_decode_one_line_one_word_eol(self):
4129 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
4130
4131 def test_decode_two_lines(self):
4132 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
4133
R David Murraycafd79d2011-03-23 15:25:55 -04004134 def test_decode_two_lines_eol(self):
4135 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
4136
R David Murrayec1b5b82011-03-23 14:19:05 -04004137 def test_decode_one_long_line(self):
4138 self._test_decode('Spam' * 250, 'Spam' * 250)
4139
4140 def test_decode_one_space(self):
4141 self._test_decode(' ', '')
4142
4143 def test_decode_multiple_spaces(self):
4144 self._test_decode(' ' * 5, '')
4145
4146 def test_decode_one_line_trailing_spaces(self):
4147 self._test_decode('hello \r\n', 'hello\n')
4148
4149 def test_decode_two_lines_trailing_spaces(self):
4150 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
4151
4152 def test_decode_quoted_word(self):
4153 self._test_decode('=22quoted=20words=22', '"quoted words"')
4154
4155 def test_decode_uppercase_quoting(self):
4156 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
4157
4158 def test_decode_lowercase_quoting(self):
4159 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
4160
4161 def test_decode_soft_line_break(self):
4162 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
4163
4164 def test_decode_false_quoting(self):
4165 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
4166
4167 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
4168 kwargs = {}
4169 if maxlinelen is None:
4170 # Use body_encode's default.
4171 maxlinelen = 76
4172 else:
4173 kwargs['maxlinelen'] = maxlinelen
4174 if eol is None:
4175 # Use body_encode's default.
4176 eol = '\n'
4177 else:
4178 kwargs['eol'] = eol
4179 encoded_body = quoprimime.body_encode(body, **kwargs)
4180 self.assertEqual(encoded_body, expected_encoded_body)
4181 if eol == '\n' or eol == '\r\n':
4182 # We know how to split the result back into lines, so maxlinelen
4183 # can be checked.
4184 for line in encoded_body.splitlines():
4185 self.assertLessEqual(len(line), maxlinelen)
4186
4187 def test_encode_null(self):
4188 self._test_encode('', '')
4189
4190 def test_encode_null_lines(self):
4191 self._test_encode('\n\n', '\n\n')
4192
4193 def test_encode_one_line(self):
4194 self._test_encode('hello\n', 'hello\n')
4195
4196 def test_encode_one_line_crlf(self):
4197 self._test_encode('hello\r\n', 'hello\n')
4198
4199 def test_encode_one_line_eol(self):
4200 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
4201
4202 def test_encode_one_space(self):
4203 self._test_encode(' ', '=20')
4204
4205 def test_encode_one_line_one_space(self):
4206 self._test_encode(' \n', '=20\n')
4207
R David Murrayb938c8c2011-03-24 12:19:26 -04004208# XXX: body_encode() expect strings, but uses ord(char) from these strings
4209# to index into a 256-entry list. For code points above 255, this will fail.
4210# Should there be a check for 8-bit only ord() values in body, or at least
4211# a comment about the expected input?
4212
4213 def test_encode_two_lines_one_space(self):
4214 self._test_encode(' \n \n', '=20\n=20\n')
4215
R David Murrayec1b5b82011-03-23 14:19:05 -04004216 def test_encode_one_word_trailing_spaces(self):
4217 self._test_encode('hello ', 'hello =20')
4218
4219 def test_encode_one_line_trailing_spaces(self):
4220 self._test_encode('hello \n', 'hello =20\n')
4221
4222 def test_encode_one_word_trailing_tab(self):
4223 self._test_encode('hello \t', 'hello =09')
4224
4225 def test_encode_one_line_trailing_tab(self):
4226 self._test_encode('hello \t\n', 'hello =09\n')
4227
4228 def test_encode_trailing_space_before_maxlinelen(self):
4229 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
4230
R David Murrayb938c8c2011-03-24 12:19:26 -04004231 def test_encode_trailing_space_at_maxlinelen(self):
4232 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
4233
R David Murrayec1b5b82011-03-23 14:19:05 -04004234 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04004235 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
4236
4237 def test_encode_whitespace_lines(self):
4238 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04004239
4240 def test_encode_quoted_equals(self):
4241 self._test_encode('a = b', 'a =3D b')
4242
4243 def test_encode_one_long_string(self):
4244 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
4245
4246 def test_encode_one_long_line(self):
4247 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4248
4249 def test_encode_one_very_long_line(self):
4250 self._test_encode('x' * 200 + '\n',
4251 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
4252
R David Murrayec1b5b82011-03-23 14:19:05 -04004253 def test_encode_shortest_maxlinelen(self):
4254 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004255
R David Murrayb938c8c2011-03-24 12:19:26 -04004256 def test_encode_maxlinelen_too_small(self):
4257 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
4258
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004259 def test_encode(self):
4260 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00004261 eq(quoprimime.body_encode(''), '')
4262 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004263 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00004264 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004265 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00004266 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004267xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
4268 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
4269x xxxx xxxx xxxx xxxx=20""")
4270 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00004271 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4272 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004273xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
4274 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
4275x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00004276 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004277one line
4278
4279two line"""), """\
4280one line
4281
4282two line""")
4283
4284
Ezio Melottib3aedd42010-11-20 19:04:17 +00004285
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004286# Test the Charset class
4287class TestCharset(unittest.TestCase):
4288 def tearDown(self):
4289 from email import charset as CharsetModule
4290 try:
4291 del CharsetModule.CHARSETS['fake']
4292 except KeyError:
4293 pass
4294
Guido van Rossum9604e662007-08-30 03:46:43 +00004295 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004296 eq = self.assertEqual
4297 # Make sure us-ascii = no Unicode conversion
4298 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00004299 eq(c.header_encode('Hello World!'), 'Hello World!')
4300 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004301 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00004302 self.assertRaises(UnicodeError, c.header_encode, s)
4303 c = Charset('utf-8')
4304 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004305
4306 def test_body_encode(self):
4307 eq = self.assertEqual
4308 # Try a charset with QP body encoding
4309 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004310 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004311 # Try a charset with Base64 body encoding
4312 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00004313 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004314 # Try a charset with None body encoding
4315 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004316 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004317 # Try the convert argument, where input codec != output codec
4318 c = Charset('euc-jp')
4319 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00004320 # XXX FIXME
4321## try:
4322## eq('\x1b$B5FCO;~IW\x1b(B',
4323## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4324## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4325## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4326## except LookupError:
4327## # We probably don't have the Japanese codecs installed
4328## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004329 # Testing SF bug #625509, which we have to fake, since there are no
4330 # built-in encodings where the header encoding is QP but the body
4331 # encoding is not.
4332 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04004333 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004334 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04004335 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004336
4337 def test_unicode_charset_name(self):
4338 charset = Charset('us-ascii')
4339 self.assertEqual(str(charset), 'us-ascii')
4340 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4341
4342
Ezio Melottib3aedd42010-11-20 19:04:17 +00004343
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004344# Test multilingual MIME headers.
4345class TestHeader(TestEmailBase):
4346 def test_simple(self):
4347 eq = self.ndiffAssertEqual
4348 h = Header('Hello World!')
4349 eq(h.encode(), 'Hello World!')
4350 h.append(' Goodbye World!')
4351 eq(h.encode(), 'Hello World! Goodbye World!')
4352
4353 def test_simple_surprise(self):
4354 eq = self.ndiffAssertEqual
4355 h = Header('Hello World!')
4356 eq(h.encode(), 'Hello World!')
4357 h.append('Goodbye World!')
4358 eq(h.encode(), 'Hello World! Goodbye World!')
4359
4360 def test_header_needs_no_decoding(self):
4361 h = 'no decoding needed'
4362 self.assertEqual(decode_header(h), [(h, None)])
4363
4364 def test_long(self):
4365 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4366 maxlinelen=76)
4367 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004368 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004369
4370 def test_multilingual(self):
4371 eq = self.ndiffAssertEqual
4372 g = Charset("iso-8859-1")
4373 cz = Charset("iso-8859-2")
4374 utf8 = Charset("utf-8")
4375 g_head = (b'Die Mieter treten hier ein werden mit einem '
4376 b'Foerderband komfortabel den Korridor entlang, '
4377 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4378 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4379 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4380 b'd\xf9vtipu.. ')
4381 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4382 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4383 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4384 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4385 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4386 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4387 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4388 '\u3044\u307e\u3059\u3002')
4389 h = Header(g_head, g)
4390 h.append(cz_head, cz)
4391 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004392 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004393 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004394=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4395 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4396 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4397 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004398 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4399 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4400 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4401 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004402 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4403 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4404 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4405 decoded = decode_header(enc)
4406 eq(len(decoded), 3)
4407 eq(decoded[0], (g_head, 'iso-8859-1'))
4408 eq(decoded[1], (cz_head, 'iso-8859-2'))
4409 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004410 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004411 eq(ustr,
4412 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4413 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4414 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4415 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4416 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4417 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4418 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4419 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4420 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4421 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4422 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4423 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4424 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4425 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4426 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4427 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4428 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004429 # Test make_header()
4430 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004431 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004432
4433 def test_empty_header_encode(self):
4434 h = Header()
4435 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004436
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004437 def test_header_ctor_default_args(self):
4438 eq = self.ndiffAssertEqual
4439 h = Header()
4440 eq(h, '')
4441 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004442 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004443
4444 def test_explicit_maxlinelen(self):
4445 eq = self.ndiffAssertEqual
4446 hstr = ('A very long line that must get split to something other '
4447 'than at the 76th character boundary to test the non-default '
4448 'behavior')
4449 h = Header(hstr)
4450 eq(h.encode(), '''\
4451A very long line that must get split to something other than at the 76th
4452 character boundary to test the non-default behavior''')
4453 eq(str(h), hstr)
4454 h = Header(hstr, header_name='Subject')
4455 eq(h.encode(), '''\
4456A very long line that must get split to something other than at the
4457 76th character boundary to test the non-default behavior''')
4458 eq(str(h), hstr)
4459 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4460 eq(h.encode(), hstr)
4461 eq(str(h), hstr)
4462
Guido van Rossum9604e662007-08-30 03:46:43 +00004463 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004464 eq = self.ndiffAssertEqual
4465 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004466 x = 'xxxx ' * 20
4467 h.append(x)
4468 s = h.encode()
4469 eq(s, """\
4470=?iso-8859-1?q?xxx?=
4471 =?iso-8859-1?q?x_?=
4472 =?iso-8859-1?q?xx?=
4473 =?iso-8859-1?q?xx?=
4474 =?iso-8859-1?q?_x?=
4475 =?iso-8859-1?q?xx?=
4476 =?iso-8859-1?q?x_?=
4477 =?iso-8859-1?q?xx?=
4478 =?iso-8859-1?q?xx?=
4479 =?iso-8859-1?q?_x?=
4480 =?iso-8859-1?q?xx?=
4481 =?iso-8859-1?q?x_?=
4482 =?iso-8859-1?q?xx?=
4483 =?iso-8859-1?q?xx?=
4484 =?iso-8859-1?q?_x?=
4485 =?iso-8859-1?q?xx?=
4486 =?iso-8859-1?q?x_?=
4487 =?iso-8859-1?q?xx?=
4488 =?iso-8859-1?q?xx?=
4489 =?iso-8859-1?q?_x?=
4490 =?iso-8859-1?q?xx?=
4491 =?iso-8859-1?q?x_?=
4492 =?iso-8859-1?q?xx?=
4493 =?iso-8859-1?q?xx?=
4494 =?iso-8859-1?q?_x?=
4495 =?iso-8859-1?q?xx?=
4496 =?iso-8859-1?q?x_?=
4497 =?iso-8859-1?q?xx?=
4498 =?iso-8859-1?q?xx?=
4499 =?iso-8859-1?q?_x?=
4500 =?iso-8859-1?q?xx?=
4501 =?iso-8859-1?q?x_?=
4502 =?iso-8859-1?q?xx?=
4503 =?iso-8859-1?q?xx?=
4504 =?iso-8859-1?q?_x?=
4505 =?iso-8859-1?q?xx?=
4506 =?iso-8859-1?q?x_?=
4507 =?iso-8859-1?q?xx?=
4508 =?iso-8859-1?q?xx?=
4509 =?iso-8859-1?q?_x?=
4510 =?iso-8859-1?q?xx?=
4511 =?iso-8859-1?q?x_?=
4512 =?iso-8859-1?q?xx?=
4513 =?iso-8859-1?q?xx?=
4514 =?iso-8859-1?q?_x?=
4515 =?iso-8859-1?q?xx?=
4516 =?iso-8859-1?q?x_?=
4517 =?iso-8859-1?q?xx?=
4518 =?iso-8859-1?q?xx?=
4519 =?iso-8859-1?q?_?=""")
4520 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004521 h = Header(charset='iso-8859-1', maxlinelen=40)
4522 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004523 s = h.encode()
4524 eq(s, """\
4525=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4526 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4527 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4528 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4529 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4530 eq(x, str(make_header(decode_header(s))))
4531
4532 def test_base64_splittable(self):
4533 eq = self.ndiffAssertEqual
4534 h = Header(charset='koi8-r', maxlinelen=20)
4535 x = 'xxxx ' * 20
4536 h.append(x)
4537 s = h.encode()
4538 eq(s, """\
4539=?koi8-r?b?eHh4?=
4540 =?koi8-r?b?eCB4?=
4541 =?koi8-r?b?eHh4?=
4542 =?koi8-r?b?IHh4?=
4543 =?koi8-r?b?eHgg?=
4544 =?koi8-r?b?eHh4?=
4545 =?koi8-r?b?eCB4?=
4546 =?koi8-r?b?eHh4?=
4547 =?koi8-r?b?IHh4?=
4548 =?koi8-r?b?eHgg?=
4549 =?koi8-r?b?eHh4?=
4550 =?koi8-r?b?eCB4?=
4551 =?koi8-r?b?eHh4?=
4552 =?koi8-r?b?IHh4?=
4553 =?koi8-r?b?eHgg?=
4554 =?koi8-r?b?eHh4?=
4555 =?koi8-r?b?eCB4?=
4556 =?koi8-r?b?eHh4?=
4557 =?koi8-r?b?IHh4?=
4558 =?koi8-r?b?eHgg?=
4559 =?koi8-r?b?eHh4?=
4560 =?koi8-r?b?eCB4?=
4561 =?koi8-r?b?eHh4?=
4562 =?koi8-r?b?IHh4?=
4563 =?koi8-r?b?eHgg?=
4564 =?koi8-r?b?eHh4?=
4565 =?koi8-r?b?eCB4?=
4566 =?koi8-r?b?eHh4?=
4567 =?koi8-r?b?IHh4?=
4568 =?koi8-r?b?eHgg?=
4569 =?koi8-r?b?eHh4?=
4570 =?koi8-r?b?eCB4?=
4571 =?koi8-r?b?eHh4?=
4572 =?koi8-r?b?IA==?=""")
4573 eq(x, str(make_header(decode_header(s))))
4574 h = Header(charset='koi8-r', maxlinelen=40)
4575 h.append(x)
4576 s = h.encode()
4577 eq(s, """\
4578=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4579 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4580 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4581 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4582 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4583 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4584 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004585
4586 def test_us_ascii_header(self):
4587 eq = self.assertEqual
4588 s = 'hello'
4589 x = decode_header(s)
4590 eq(x, [('hello', None)])
4591 h = make_header(x)
4592 eq(s, h.encode())
4593
4594 def test_string_charset(self):
4595 eq = self.assertEqual
4596 h = Header()
4597 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004598 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004599
4600## def test_unicode_error(self):
4601## raises = self.assertRaises
4602## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4603## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4604## h = Header()
4605## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4606## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4607## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4608
4609 def test_utf8_shortest(self):
4610 eq = self.assertEqual
4611 h = Header('p\xf6stal', 'utf-8')
4612 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4613 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4614 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4615
4616 def test_bad_8bit_header(self):
4617 raises = self.assertRaises
4618 eq = self.assertEqual
4619 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4620 raises(UnicodeError, Header, x)
4621 h = Header()
4622 raises(UnicodeError, h.append, x)
4623 e = x.decode('utf-8', 'replace')
4624 eq(str(Header(x, errors='replace')), e)
4625 h.append(x, errors='replace')
4626 eq(str(h), e)
4627
R David Murray041015c2011-03-25 15:10:55 -04004628 def test_escaped_8bit_header(self):
4629 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004630 e = x.decode('ascii', 'surrogateescape')
4631 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004632 self.assertEqual(str(h),
4633 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4634 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4635
R David Murraye5e366c2011-06-18 12:57:28 -04004636 def test_header_handles_binary_unknown8bit(self):
4637 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4638 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4639 self.assertEqual(str(h),
4640 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4641 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4642
4643 def test_make_header_handles_binary_unknown8bit(self):
4644 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4645 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4646 h2 = email.header.make_header(email.header.decode_header(h))
4647 self.assertEqual(str(h2),
4648 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4649 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4650
R David Murray041015c2011-03-25 15:10:55 -04004651 def test_modify_returned_list_does_not_change_header(self):
4652 h = Header('test')
4653 chunks = email.header.decode_header(h)
4654 chunks.append(('ascii', 'test2'))
4655 self.assertEqual(str(h), 'test')
4656
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004657 def test_encoded_adjacent_nonencoded(self):
4658 eq = self.assertEqual
4659 h = Header()
4660 h.append('hello', 'iso-8859-1')
4661 h.append('world')
4662 s = h.encode()
4663 eq(s, '=?iso-8859-1?q?hello?= world')
4664 h = make_header(decode_header(s))
4665 eq(h.encode(), s)
4666
R David Murray07ea53c2012-06-02 17:56:49 -04004667 def test_whitespace_keeper(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004668 eq = self.assertEqual
4669 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4670 parts = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04004671 eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004672 hdr = make_header(parts)
4673 eq(hdr.encode(),
4674 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4675
4676 def test_broken_base64_header(self):
4677 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004678 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004679 raises(errors.HeaderParseError, decode_header, s)
4680
R. David Murray477efb32011-01-05 01:39:32 +00004681 def test_shift_jis_charset(self):
4682 h = Header('文', charset='shift_jis')
4683 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4684
R David Murrayde912762011-03-16 18:26:23 -04004685 def test_flatten_header_with_no_value(self):
4686 # Issue 11401 (regression from email 4.x) Note that the space after
4687 # the header doesn't reflect the input, but this is also the way
4688 # email 4.x behaved. At some point it would be nice to fix that.
4689 msg = email.message_from_string("EmptyHeader:")
4690 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4691
R David Murray01581ee2011-04-18 10:04:34 -04004692 def test_encode_preserves_leading_ws_on_value(self):
4693 msg = Message()
4694 msg['SomeHeader'] = ' value with leading ws'
4695 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4696
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004697
Ezio Melottib3aedd42010-11-20 19:04:17 +00004698
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004699# Test RFC 2231 header parameters (en/de)coding
4700class TestRFC2231(TestEmailBase):
R David Murray97f43c02012-06-24 05:03:27 -04004701
4702 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
4703 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004704 def test_get_param(self):
4705 eq = self.assertEqual
4706 msg = self._msgobj('msg_29.txt')
4707 eq(msg.get_param('title'),
4708 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4709 eq(msg.get_param('title', unquote=False),
4710 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4711
4712 def test_set_param(self):
4713 eq = self.ndiffAssertEqual
4714 msg = Message()
4715 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4716 charset='us-ascii')
4717 eq(msg.get_param('title'),
4718 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4719 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4720 charset='us-ascii', language='en')
4721 eq(msg.get_param('title'),
4722 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4723 msg = self._msgobj('msg_01.txt')
4724 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4725 charset='us-ascii', language='en')
4726 eq(msg.as_string(maxheaderlen=78), """\
4727Return-Path: <bbb@zzz.org>
4728Delivered-To: bbb@zzz.org
4729Received: by mail.zzz.org (Postfix, from userid 889)
4730\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4731MIME-Version: 1.0
4732Content-Transfer-Encoding: 7bit
4733Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4734From: bbb@ddd.com (John X. Doe)
4735To: bbb@zzz.org
4736Subject: This is a test message
4737Date: Fri, 4 May 2001 14:05:44 -0400
4738Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004739 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004740
4741
4742Hi,
4743
4744Do you like this message?
4745
4746-Me
4747""")
4748
R David Murraya2860e82011-04-16 09:20:30 -04004749 def test_set_param_requote(self):
4750 msg = Message()
4751 msg.set_param('title', 'foo')
4752 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4753 msg.set_param('title', 'bar', requote=False)
4754 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4755 # tspecial is still quoted.
4756 msg.set_param('title', "(bar)bell", requote=False)
4757 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4758
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004759 def test_del_param(self):
4760 eq = self.ndiffAssertEqual
4761 msg = self._msgobj('msg_01.txt')
4762 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4763 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4764 charset='us-ascii', language='en')
4765 msg.del_param('foo', header='Content-Type')
4766 eq(msg.as_string(maxheaderlen=78), """\
4767Return-Path: <bbb@zzz.org>
4768Delivered-To: bbb@zzz.org
4769Received: by mail.zzz.org (Postfix, from userid 889)
4770\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4771MIME-Version: 1.0
4772Content-Transfer-Encoding: 7bit
4773Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4774From: bbb@ddd.com (John X. Doe)
4775To: bbb@zzz.org
4776Subject: This is a test message
4777Date: Fri, 4 May 2001 14:05:44 -0400
4778Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004779 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004780
4781
4782Hi,
4783
4784Do you like this message?
4785
4786-Me
4787""")
4788
R David Murray97f43c02012-06-24 05:03:27 -04004789 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset
4790 # I changed the charset name, though, because the one in the file isn't
4791 # a legal charset name. Should add a test for an illegal charset.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004792 def test_rfc2231_get_content_charset(self):
4793 eq = self.assertEqual
4794 msg = self._msgobj('msg_32.txt')
4795 eq(msg.get_content_charset(), 'us-ascii')
4796
R David Murray97f43c02012-06-24 05:03:27 -04004797 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes
R. David Murraydfd7eb02010-12-24 22:36:49 +00004798 def test_rfc2231_parse_rfc_quoting(self):
4799 m = textwrap.dedent('''\
4800 Content-Disposition: inline;
4801 \tfilename*0*=''This%20is%20even%20more%20;
4802 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4803 \tfilename*2="is it not.pdf"
4804
4805 ''')
4806 msg = email.message_from_string(m)
4807 self.assertEqual(msg.get_filename(),
4808 'This is even more ***fun*** is it not.pdf')
4809 self.assertEqual(m, msg.as_string())
4810
R David Murray97f43c02012-06-24 05:03:27 -04004811 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
R. David Murraydfd7eb02010-12-24 22:36:49 +00004812 def test_rfc2231_parse_extra_quoting(self):
4813 m = textwrap.dedent('''\
4814 Content-Disposition: inline;
4815 \tfilename*0*="''This%20is%20even%20more%20";
4816 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4817 \tfilename*2="is it not.pdf"
4818
4819 ''')
4820 msg = email.message_from_string(m)
4821 self.assertEqual(msg.get_filename(),
4822 'This is even more ***fun*** is it not.pdf')
4823 self.assertEqual(m, msg.as_string())
4824
R David Murray97f43c02012-06-24 05:03:27 -04004825 # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset
4826 # but new test uses *0* because otherwise lang/charset is not valid.
4827 # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004828 def test_rfc2231_no_language_or_charset(self):
4829 m = '''\
4830Content-Transfer-Encoding: 8bit
4831Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4832Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4833
4834'''
4835 msg = email.message_from_string(m)
4836 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004837 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004838 self.assertEqual(
4839 param,
4840 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4841
R David Murray97f43c02012-06-24 05:03:27 -04004842 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004843 def test_rfc2231_no_language_or_charset_in_filename(self):
4844 m = '''\
4845Content-Disposition: inline;
4846\tfilename*0*="''This%20is%20even%20more%20";
4847\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4848\tfilename*2="is it not.pdf"
4849
4850'''
4851 msg = email.message_from_string(m)
4852 self.assertEqual(msg.get_filename(),
4853 'This is even more ***fun*** is it not.pdf')
4854
R David Murray97f43c02012-06-24 05:03:27 -04004855 # Duplicate of previous test?
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004856 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4857 m = '''\
4858Content-Disposition: inline;
4859\tfilename*0*="''This%20is%20even%20more%20";
4860\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4861\tfilename*2="is it not.pdf"
4862
4863'''
4864 msg = email.message_from_string(m)
4865 self.assertEqual(msg.get_filename(),
4866 'This is even more ***fun*** is it not.pdf')
4867
R David Murray97f43c02012-06-24 05:03:27 -04004868 # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded,
4869 # but the test below is wrong (the first part should be decoded).
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004870 def test_rfc2231_partly_encoded(self):
4871 m = '''\
4872Content-Disposition: inline;
4873\tfilename*0="''This%20is%20even%20more%20";
4874\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4875\tfilename*2="is it not.pdf"
4876
4877'''
4878 msg = email.message_from_string(m)
4879 self.assertEqual(
4880 msg.get_filename(),
4881 'This%20is%20even%20more%20***fun*** is it not.pdf')
4882
4883 def test_rfc2231_partly_nonencoded(self):
4884 m = '''\
4885Content-Disposition: inline;
4886\tfilename*0="This%20is%20even%20more%20";
4887\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4888\tfilename*2="is it not.pdf"
4889
4890'''
4891 msg = email.message_from_string(m)
4892 self.assertEqual(
4893 msg.get_filename(),
4894 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4895
4896 def test_rfc2231_no_language_or_charset_in_boundary(self):
4897 m = '''\
4898Content-Type: multipart/alternative;
4899\tboundary*0*="''This%20is%20even%20more%20";
4900\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4901\tboundary*2="is it not.pdf"
4902
4903'''
4904 msg = email.message_from_string(m)
4905 self.assertEqual(msg.get_boundary(),
4906 'This is even more ***fun*** is it not.pdf')
4907
4908 def test_rfc2231_no_language_or_charset_in_charset(self):
4909 # This is a nonsensical charset value, but tests the code anyway
4910 m = '''\
4911Content-Type: text/plain;
4912\tcharset*0*="This%20is%20even%20more%20";
4913\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4914\tcharset*2="is it not.pdf"
4915
4916'''
4917 msg = email.message_from_string(m)
4918 self.assertEqual(msg.get_content_charset(),
4919 'this is even more ***fun*** is it not.pdf')
4920
R David Murray97f43c02012-06-24 05:03:27 -04004921 # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004922 def test_rfc2231_bad_encoding_in_filename(self):
4923 m = '''\
4924Content-Disposition: inline;
4925\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4926\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4927\tfilename*2="is it not.pdf"
4928
4929'''
4930 msg = email.message_from_string(m)
4931 self.assertEqual(msg.get_filename(),
4932 'This is even more ***fun*** is it not.pdf')
4933
4934 def test_rfc2231_bad_encoding_in_charset(self):
4935 m = """\
4936Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4937
4938"""
4939 msg = email.message_from_string(m)
4940 # This should return None because non-ascii characters in the charset
4941 # are not allowed.
4942 self.assertEqual(msg.get_content_charset(), None)
4943
4944 def test_rfc2231_bad_character_in_charset(self):
4945 m = """\
4946Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4947
4948"""
4949 msg = email.message_from_string(m)
4950 # This should return None because non-ascii characters in the charset
4951 # are not allowed.
4952 self.assertEqual(msg.get_content_charset(), None)
4953
4954 def test_rfc2231_bad_character_in_filename(self):
4955 m = '''\
4956Content-Disposition: inline;
4957\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4958\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4959\tfilename*2*="is it not.pdf%E2"
4960
4961'''
4962 msg = email.message_from_string(m)
4963 self.assertEqual(msg.get_filename(),
4964 'This is even more ***fun*** is it not.pdf\ufffd')
4965
4966 def test_rfc2231_unknown_encoding(self):
4967 m = """\
4968Content-Transfer-Encoding: 8bit
4969Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4970
4971"""
4972 msg = email.message_from_string(m)
4973 self.assertEqual(msg.get_filename(), 'myfile.txt')
4974
4975 def test_rfc2231_single_tick_in_filename_extended(self):
4976 eq = self.assertEqual
4977 m = """\
4978Content-Type: application/x-foo;
4979\tname*0*=\"Frank's\"; name*1*=\" Document\"
4980
4981"""
4982 msg = email.message_from_string(m)
4983 charset, language, s = msg.get_param('name')
4984 eq(charset, None)
4985 eq(language, None)
4986 eq(s, "Frank's Document")
4987
R David Murray97f43c02012-06-24 05:03:27 -04004988 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004989 def test_rfc2231_single_tick_in_filename(self):
4990 m = """\
4991Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4992
4993"""
4994 msg = email.message_from_string(m)
4995 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004996 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004997 self.assertEqual(param, "Frank's Document")
4998
R David Murray97f43c02012-06-24 05:03:27 -04004999 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005000 def test_rfc2231_tick_attack_extended(self):
5001 eq = self.assertEqual
5002 m = """\
5003Content-Type: application/x-foo;
5004\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
5005
5006"""
5007 msg = email.message_from_string(m)
5008 charset, language, s = msg.get_param('name')
5009 eq(charset, 'us-ascii')
5010 eq(language, 'en-us')
5011 eq(s, "Frank's Document")
5012
R David Murray97f43c02012-06-24 05:03:27 -04005013 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005014 def test_rfc2231_tick_attack(self):
5015 m = """\
5016Content-Type: application/x-foo;
5017\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
5018
5019"""
5020 msg = email.message_from_string(m)
5021 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00005022 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005023 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
5024
R David Murray97f43c02012-06-24 05:03:27 -04005025 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005026 def test_rfc2231_no_extended_values(self):
5027 eq = self.assertEqual
5028 m = """\
5029Content-Type: application/x-foo; name=\"Frank's Document\"
5030
5031"""
5032 msg = email.message_from_string(m)
5033 eq(msg.get_param('name'), "Frank's Document")
5034
R David Murray97f43c02012-06-24 05:03:27 -04005035 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005036 def test_rfc2231_encoded_then_unencoded_segments(self):
5037 eq = self.assertEqual
5038 m = """\
5039Content-Type: application/x-foo;
5040\tname*0*=\"us-ascii'en-us'My\";
5041\tname*1=\" Document\";
5042\tname*2*=\" For You\"
5043
5044"""
5045 msg = email.message_from_string(m)
5046 charset, language, s = msg.get_param('name')
5047 eq(charset, 'us-ascii')
5048 eq(language, 'en-us')
5049 eq(s, 'My Document For You')
5050
R David Murray97f43c02012-06-24 05:03:27 -04005051 # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments
5052 # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005053 def test_rfc2231_unencoded_then_encoded_segments(self):
5054 eq = self.assertEqual
5055 m = """\
5056Content-Type: application/x-foo;
5057\tname*0=\"us-ascii'en-us'My\";
5058\tname*1*=\" Document\";
5059\tname*2*=\" For You\"
5060
5061"""
5062 msg = email.message_from_string(m)
5063 charset, language, s = msg.get_param('name')
5064 eq(charset, 'us-ascii')
5065 eq(language, 'en-us')
5066 eq(s, 'My Document For You')
5067
5068
Ezio Melottib3aedd42010-11-20 19:04:17 +00005069
R. David Murraya8f480f2010-01-16 18:30:03 +00005070# Tests to ensure that signed parts of an email are completely preserved, as
5071# required by RFC1847 section 2.1. Note that these are incomplete, because the
5072# email package does not currently always preserve the body. See issue 1670765.
5073class TestSigned(TestEmailBase):
5074
5075 def _msg_and_obj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -04005076 with openfile(filename) as fp:
R. David Murraya8f480f2010-01-16 18:30:03 +00005077 original = fp.read()
5078 msg = email.message_from_string(original)
5079 return original, msg
5080
5081 def _signed_parts_eq(self, original, result):
5082 # Extract the first mime part of each message
5083 import re
5084 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
5085 inpart = repart.search(original).group(2)
5086 outpart = repart.search(result).group(2)
5087 self.assertEqual(outpart, inpart)
5088
5089 def test_long_headers_as_string(self):
5090 original, msg = self._msg_and_obj('msg_45.txt')
5091 result = msg.as_string()
5092 self._signed_parts_eq(original, result)
5093
5094 def test_long_headers_as_string_maxheaderlen(self):
5095 original, msg = self._msg_and_obj('msg_45.txt')
5096 result = msg.as_string(maxheaderlen=60)
5097 self._signed_parts_eq(original, result)
5098
5099 def test_long_headers_flatten(self):
5100 original, msg = self._msg_and_obj('msg_45.txt')
5101 fp = StringIO()
5102 Generator(fp).flatten(msg)
5103 result = fp.getvalue()
5104 self._signed_parts_eq(original, result)
5105
5106
Ezio Melottib3aedd42010-11-20 19:04:17 +00005107
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005108if __name__ == '__main__':
R David Murray9aaba782011-03-21 17:17:06 -04005109 unittest.main()