blob: 56be794a0a88b83c2e45a7a9530eed1a60ee96dd [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
R. David Murray719a4492010-11-21 16:53:48 +00005import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00006import time
7import base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +00008import unittest
R. David Murray96fd54e2010-10-08 15:55:28 +00009import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000010
R. David Murray96fd54e2010-10-08 15:55:28 +000011from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000012from itertools import chain
13
14import email
R David Murrayc27e5222012-05-25 15:01:48 -040015import email.policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016
17from email.charset import Charset
18from email.header import Header, decode_header, make_header
19from email.parser import Parser, HeaderParser
R David Murray638d40b2012-08-24 11:14:13 -040020from email.generator import Generator, DecodedGenerator, BytesGenerator
Guido van Rossum8b3febe2007-08-30 01:15:14 +000021from email.message import Message
22from email.mime.application import MIMEApplication
23from email.mime.audio import MIMEAudio
24from email.mime.text import MIMEText
25from email.mime.image import MIMEImage
26from email.mime.base import MIMEBase
27from email.mime.message import MIMEMessage
28from email.mime.multipart import MIMEMultipart
29from email import utils
30from email import errors
31from email import encoders
32from email import iterators
33from email import base64mime
34from email import quoprimime
35
R David Murray965794e2013-03-07 18:16:47 -050036from test.support import unlink
R David Murraya256bac2011-03-31 12:20:23 -040037from test.test_email import openfile, TestEmailBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +000038
R David Murray612528d2013-03-15 20:38:15 -040039# These imports are documented to work, but we are testing them using a
40# different path, so we import them here just to make sure they are importable.
41from email.parser import FeedParser, BytesFeedParser
42
Guido van Rossum8b3febe2007-08-30 01:15:14 +000043NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Guido van Rossum8b3febe2007-08-30 01:15:14 +000048# Test various aspects of the Message class's API
49class TestMessageAPI(TestEmailBase):
50 def test_get_all(self):
51 eq = self.assertEqual
52 msg = self._msgobj('msg_20.txt')
53 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
54 eq(msg.get_all('xx', 'n/a'), 'n/a')
55
R. David Murraye5db2632010-11-20 15:10:13 +000056 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000057 eq = self.assertEqual
58 msg = Message()
59 eq(msg.get_charset(), None)
60 charset = Charset('iso-8859-1')
61 msg.set_charset(charset)
62 eq(msg['mime-version'], '1.0')
63 eq(msg.get_content_type(), 'text/plain')
64 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
65 eq(msg.get_param('charset'), 'iso-8859-1')
66 eq(msg['content-transfer-encoding'], 'quoted-printable')
67 eq(msg.get_charset().input_charset, 'iso-8859-1')
68 # Remove the charset
69 msg.set_charset(None)
70 eq(msg.get_charset(), None)
71 eq(msg['content-type'], 'text/plain')
72 # Try adding a charset when there's already MIME headers present
73 msg = Message()
74 msg['MIME-Version'] = '2.0'
75 msg['Content-Type'] = 'text/x-weird'
76 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
77 msg.set_charset(charset)
78 eq(msg['mime-version'], '2.0')
79 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
80 eq(msg['content-transfer-encoding'], 'quinted-puntable')
81
82 def test_set_charset_from_string(self):
83 eq = self.assertEqual
84 msg = Message()
85 msg.set_charset('us-ascii')
86 eq(msg.get_charset().input_charset, 'us-ascii')
87 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
88
89 def test_set_payload_with_charset(self):
90 msg = Message()
91 charset = Charset('iso-8859-1')
92 msg.set_payload('This is a string payload', charset)
93 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
94
95 def test_get_charsets(self):
96 eq = self.assertEqual
97
98 msg = self._msgobj('msg_08.txt')
99 charsets = msg.get_charsets()
100 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
101
102 msg = self._msgobj('msg_09.txt')
103 charsets = msg.get_charsets('dingbat')
104 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
105 'koi8-r'])
106
107 msg = self._msgobj('msg_12.txt')
108 charsets = msg.get_charsets()
109 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
110 'iso-8859-3', 'us-ascii', 'koi8-r'])
111
112 def test_get_filename(self):
113 eq = self.assertEqual
114
115 msg = self._msgobj('msg_04.txt')
116 filenames = [p.get_filename() for p in msg.get_payload()]
117 eq(filenames, ['msg.txt', 'msg.txt'])
118
119 msg = self._msgobj('msg_07.txt')
120 subpart = msg.get_payload(1)
121 eq(subpart.get_filename(), 'dingusfish.gif')
122
123 def test_get_filename_with_name_parameter(self):
124 eq = self.assertEqual
125
126 msg = self._msgobj('msg_44.txt')
127 filenames = [p.get_filename() for p in msg.get_payload()]
128 eq(filenames, ['msg.txt', 'msg.txt'])
129
130 def test_get_boundary(self):
131 eq = self.assertEqual
132 msg = self._msgobj('msg_07.txt')
133 # No quotes!
134 eq(msg.get_boundary(), 'BOUNDARY')
135
136 def test_set_boundary(self):
137 eq = self.assertEqual
138 # This one has no existing boundary parameter, but the Content-Type:
139 # header appears fifth.
140 msg = self._msgobj('msg_01.txt')
141 msg.set_boundary('BOUNDARY')
142 header, value = msg.items()[4]
143 eq(header.lower(), 'content-type')
144 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
145 # This one has a Content-Type: header, with a boundary, stuck in the
146 # middle of its headers. Make sure the order is preserved; it should
147 # be fifth.
148 msg = self._msgobj('msg_04.txt')
149 msg.set_boundary('BOUNDARY')
150 header, value = msg.items()[4]
151 eq(header.lower(), 'content-type')
152 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
153 # And this one has no Content-Type: header at all.
154 msg = self._msgobj('msg_03.txt')
155 self.assertRaises(errors.HeaderParseError,
156 msg.set_boundary, 'BOUNDARY')
157
R. David Murray73a559d2010-12-21 18:07:59 +0000158 def test_make_boundary(self):
159 msg = MIMEMultipart('form-data')
160 # Note that when the boundary gets created is an implementation
161 # detail and might change.
162 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
163 # Trigger creation of boundary
164 msg.as_string()
165 self.assertEqual(msg.items()[0][1][:33],
166 'multipart/form-data; boundary="==')
167 # XXX: there ought to be tests of the uniqueness of the boundary, too.
168
R. David Murray57c45ac2010-02-21 04:39:40 +0000169 def test_message_rfc822_only(self):
170 # Issue 7970: message/rfc822 not in multipart parsed by
171 # HeaderParser caused an exception when flattened.
R David Murray28346b82011-03-31 11:40:20 -0400172 with openfile('msg_46.txt') as fp:
Brett Cannon384917a2010-10-29 23:08:36 +0000173 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000174 parser = HeaderParser()
175 msg = parser.parsestr(msgdata)
176 out = StringIO()
177 gen = Generator(out, True, 0)
178 gen.flatten(msg, False)
179 self.assertEqual(out.getvalue(), msgdata)
180
R David Murrayb35c8502011-04-13 16:46:05 -0400181 def test_byte_message_rfc822_only(self):
182 # Make sure new bytes header parser also passes this.
183 with openfile('msg_46.txt', 'rb') as fp:
184 msgdata = fp.read()
185 parser = email.parser.BytesHeaderParser()
186 msg = parser.parsebytes(msgdata)
187 out = BytesIO()
188 gen = email.generator.BytesGenerator(out)
189 gen.flatten(msg)
190 self.assertEqual(out.getvalue(), msgdata)
191
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000192 def test_get_decoded_payload(self):
193 eq = self.assertEqual
194 msg = self._msgobj('msg_10.txt')
195 # The outer message is a multipart
196 eq(msg.get_payload(decode=True), None)
197 # Subpart 1 is 7bit encoded
198 eq(msg.get_payload(0).get_payload(decode=True),
199 b'This is a 7bit encoded message.\n')
200 # Subpart 2 is quopri
201 eq(msg.get_payload(1).get_payload(decode=True),
202 b'\xa1This is a Quoted Printable encoded message!\n')
203 # Subpart 3 is base64
204 eq(msg.get_payload(2).get_payload(decode=True),
205 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000206 # Subpart 4 is base64 with a trailing newline, which
207 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000208 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000209 b'This is a Base64 encoded message.\n')
210 # Subpart 5 has no Content-Transfer-Encoding: header.
211 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000212 b'This has no Content-Transfer-Encoding: header.\n')
213
214 def test_get_decoded_uu_payload(self):
215 eq = self.assertEqual
216 msg = Message()
217 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
218 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
219 msg['content-transfer-encoding'] = cte
220 eq(msg.get_payload(decode=True), b'hello world')
221 # Now try some bogus data
222 msg.set_payload('foo')
223 eq(msg.get_payload(decode=True), b'foo')
224
R David Murraya2860e82011-04-16 09:20:30 -0400225 def test_get_payload_n_raises_on_non_multipart(self):
226 msg = Message()
227 self.assertRaises(TypeError, msg.get_payload, 1)
228
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000229 def test_decoded_generator(self):
230 eq = self.assertEqual
231 msg = self._msgobj('msg_07.txt')
232 with openfile('msg_17.txt') as fp:
233 text = fp.read()
234 s = StringIO()
235 g = DecodedGenerator(s)
236 g.flatten(msg)
237 eq(s.getvalue(), text)
238
239 def test__contains__(self):
240 msg = Message()
241 msg['From'] = 'Me'
242 msg['to'] = 'You'
243 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000244 self.assertTrue('from' in msg)
245 self.assertTrue('From' in msg)
246 self.assertTrue('FROM' in msg)
247 self.assertTrue('to' in msg)
248 self.assertTrue('To' in msg)
249 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000250
251 def test_as_string(self):
252 eq = self.ndiffAssertEqual
253 msg = self._msgobj('msg_01.txt')
254 with openfile('msg_01.txt') as fp:
255 text = fp.read()
256 eq(text, str(msg))
257 fullrepr = msg.as_string(unixfrom=True)
258 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000259 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000260 eq(text, NL.join(lines[1:]))
261
R David Murray97f43c02012-06-24 05:03:27 -0400262 # test_headerregistry.TestContentTypeHeader.bad_params
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000263 def test_bad_param(self):
264 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
265 self.assertEqual(msg.get_param('baz'), '')
266
267 def test_missing_filename(self):
268 msg = email.message_from_string("From: foo\n")
269 self.assertEqual(msg.get_filename(), None)
270
271 def test_bogus_filename(self):
272 msg = email.message_from_string(
273 "Content-Disposition: blarg; filename\n")
274 self.assertEqual(msg.get_filename(), '')
275
276 def test_missing_boundary(self):
277 msg = email.message_from_string("From: foo\n")
278 self.assertEqual(msg.get_boundary(), None)
279
280 def test_get_params(self):
281 eq = self.assertEqual
282 msg = email.message_from_string(
283 'X-Header: foo=one; bar=two; baz=three\n')
284 eq(msg.get_params(header='x-header'),
285 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
286 msg = email.message_from_string(
287 'X-Header: foo; bar=one; baz=two\n')
288 eq(msg.get_params(header='x-header'),
289 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
290 eq(msg.get_params(), None)
291 msg = email.message_from_string(
292 'X-Header: foo; bar="one"; baz=two\n')
293 eq(msg.get_params(header='x-header'),
294 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
295
R David Murray97f43c02012-06-24 05:03:27 -0400296 # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000297 def test_get_param_liberal(self):
298 msg = Message()
299 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
300 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
301
302 def test_get_param(self):
303 eq = self.assertEqual
304 msg = email.message_from_string(
305 "X-Header: foo=one; bar=two; baz=three\n")
306 eq(msg.get_param('bar', header='x-header'), 'two')
307 eq(msg.get_param('quuz', header='x-header'), None)
308 eq(msg.get_param('quuz'), None)
309 msg = email.message_from_string(
310 'X-Header: foo; bar="one"; baz=two\n')
311 eq(msg.get_param('foo', header='x-header'), '')
312 eq(msg.get_param('bar', header='x-header'), 'one')
313 eq(msg.get_param('baz', header='x-header'), 'two')
314 # XXX: We are not RFC-2045 compliant! We cannot parse:
315 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
316 # msg.get_param("weird")
317 # yet.
318
R David Murray97f43c02012-06-24 05:03:27 -0400319 # test_headerregistry.TestContentTypeHeader.spaces_around_semis
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000320 def test_get_param_funky_continuation_lines(self):
321 msg = self._msgobj('msg_22.txt')
322 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
323
R David Murray97f43c02012-06-24 05:03:27 -0400324 # test_headerregistry.TestContentTypeHeader.semis_inside_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000325 def test_get_param_with_semis_in_quotes(self):
326 msg = email.message_from_string(
327 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
328 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
329 self.assertEqual(msg.get_param('name', unquote=False),
330 '"Jim&amp;&amp;Jill"')
331
R David Murray97f43c02012-06-24 05:03:27 -0400332 # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value
R. David Murrayd48739f2010-04-14 18:59:18 +0000333 def test_get_param_with_quotes(self):
334 msg = email.message_from_string(
335 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
336 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
337 msg = email.message_from_string(
338 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
339 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
340
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000341 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000342 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000343 msg = email.message_from_string('Header: exists')
344 unless('header' in msg)
345 unless('Header' in msg)
346 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000347 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000348
349 def test_set_param(self):
350 eq = self.assertEqual
351 msg = Message()
352 msg.set_param('charset', 'iso-2022-jp')
353 eq(msg.get_param('charset'), 'iso-2022-jp')
354 msg.set_param('importance', 'high value')
355 eq(msg.get_param('importance'), 'high value')
356 eq(msg.get_param('importance', unquote=False), '"high value"')
357 eq(msg.get_params(), [('text/plain', ''),
358 ('charset', 'iso-2022-jp'),
359 ('importance', 'high value')])
360 eq(msg.get_params(unquote=False), [('text/plain', ''),
361 ('charset', '"iso-2022-jp"'),
362 ('importance', '"high value"')])
363 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
364 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
365
366 def test_del_param(self):
367 eq = self.assertEqual
368 msg = self._msgobj('msg_05.txt')
369 eq(msg.get_params(),
370 [('multipart/report', ''), ('report-type', 'delivery-status'),
371 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
372 old_val = msg.get_param("report-type")
373 msg.del_param("report-type")
374 eq(msg.get_params(),
375 [('multipart/report', ''),
376 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
377 msg.set_param("report-type", old_val)
378 eq(msg.get_params(),
379 [('multipart/report', ''),
380 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
381 ('report-type', old_val)])
382
383 def test_del_param_on_other_header(self):
384 msg = Message()
385 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
386 msg.del_param('filename', 'content-disposition')
387 self.assertEqual(msg['content-disposition'], 'attachment')
388
R David Murraya2860e82011-04-16 09:20:30 -0400389 def test_del_param_on_nonexistent_header(self):
390 msg = Message()
R David Murray271ade82013-07-25 12:11:55 -0400391 # Deleting param on empty msg should not raise exception.
R David Murraya2860e82011-04-16 09:20:30 -0400392 msg.del_param('filename', 'content-disposition')
393
394 def test_del_nonexistent_param(self):
395 msg = Message()
396 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
397 existing_header = msg['Content-Type']
398 msg.del_param('foobar', header='Content-Type')
R David Murray271ade82013-07-25 12:11:55 -0400399 self.assertEqual(msg['Content-Type'], existing_header)
R David Murraya2860e82011-04-16 09:20:30 -0400400
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000401 def test_set_type(self):
402 eq = self.assertEqual
403 msg = Message()
404 self.assertRaises(ValueError, msg.set_type, 'text')
405 msg.set_type('text/plain')
406 eq(msg['content-type'], 'text/plain')
407 msg.set_param('charset', 'us-ascii')
408 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
409 msg.set_type('text/html')
410 eq(msg['content-type'], 'text/html; charset="us-ascii"')
411
412 def test_set_type_on_other_header(self):
413 msg = Message()
414 msg['X-Content-Type'] = 'text/plain'
415 msg.set_type('application/octet-stream', 'X-Content-Type')
416 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
417
418 def test_get_content_type_missing(self):
419 msg = Message()
420 self.assertEqual(msg.get_content_type(), 'text/plain')
421
422 def test_get_content_type_missing_with_default_type(self):
423 msg = Message()
424 msg.set_default_type('message/rfc822')
425 self.assertEqual(msg.get_content_type(), 'message/rfc822')
426
427 def test_get_content_type_from_message_implicit(self):
428 msg = self._msgobj('msg_30.txt')
429 self.assertEqual(msg.get_payload(0).get_content_type(),
430 'message/rfc822')
431
432 def test_get_content_type_from_message_explicit(self):
433 msg = self._msgobj('msg_28.txt')
434 self.assertEqual(msg.get_payload(0).get_content_type(),
435 'message/rfc822')
436
437 def test_get_content_type_from_message_text_plain_implicit(self):
438 msg = self._msgobj('msg_03.txt')
439 self.assertEqual(msg.get_content_type(), 'text/plain')
440
441 def test_get_content_type_from_message_text_plain_explicit(self):
442 msg = self._msgobj('msg_01.txt')
443 self.assertEqual(msg.get_content_type(), 'text/plain')
444
445 def test_get_content_maintype_missing(self):
446 msg = Message()
447 self.assertEqual(msg.get_content_maintype(), 'text')
448
449 def test_get_content_maintype_missing_with_default_type(self):
450 msg = Message()
451 msg.set_default_type('message/rfc822')
452 self.assertEqual(msg.get_content_maintype(), 'message')
453
454 def test_get_content_maintype_from_message_implicit(self):
455 msg = self._msgobj('msg_30.txt')
456 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
457
458 def test_get_content_maintype_from_message_explicit(self):
459 msg = self._msgobj('msg_28.txt')
460 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
461
462 def test_get_content_maintype_from_message_text_plain_implicit(self):
463 msg = self._msgobj('msg_03.txt')
464 self.assertEqual(msg.get_content_maintype(), 'text')
465
466 def test_get_content_maintype_from_message_text_plain_explicit(self):
467 msg = self._msgobj('msg_01.txt')
468 self.assertEqual(msg.get_content_maintype(), 'text')
469
470 def test_get_content_subtype_missing(self):
471 msg = Message()
472 self.assertEqual(msg.get_content_subtype(), 'plain')
473
474 def test_get_content_subtype_missing_with_default_type(self):
475 msg = Message()
476 msg.set_default_type('message/rfc822')
477 self.assertEqual(msg.get_content_subtype(), 'rfc822')
478
479 def test_get_content_subtype_from_message_implicit(self):
480 msg = self._msgobj('msg_30.txt')
481 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
482
483 def test_get_content_subtype_from_message_explicit(self):
484 msg = self._msgobj('msg_28.txt')
485 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
486
487 def test_get_content_subtype_from_message_text_plain_implicit(self):
488 msg = self._msgobj('msg_03.txt')
489 self.assertEqual(msg.get_content_subtype(), 'plain')
490
491 def test_get_content_subtype_from_message_text_plain_explicit(self):
492 msg = self._msgobj('msg_01.txt')
493 self.assertEqual(msg.get_content_subtype(), 'plain')
494
495 def test_get_content_maintype_error(self):
496 msg = Message()
497 msg['Content-Type'] = 'no-slash-in-this-string'
498 self.assertEqual(msg.get_content_maintype(), 'text')
499
500 def test_get_content_subtype_error(self):
501 msg = Message()
502 msg['Content-Type'] = 'no-slash-in-this-string'
503 self.assertEqual(msg.get_content_subtype(), 'plain')
504
505 def test_replace_header(self):
506 eq = self.assertEqual
507 msg = Message()
508 msg.add_header('First', 'One')
509 msg.add_header('Second', 'Two')
510 msg.add_header('Third', 'Three')
511 eq(msg.keys(), ['First', 'Second', 'Third'])
512 eq(msg.values(), ['One', 'Two', 'Three'])
513 msg.replace_header('Second', 'Twenty')
514 eq(msg.keys(), ['First', 'Second', 'Third'])
515 eq(msg.values(), ['One', 'Twenty', 'Three'])
516 msg.add_header('First', 'Eleven')
517 msg.replace_header('First', 'One Hundred')
518 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
519 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
520 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
521
R David Murray80e0aee2012-05-27 21:23:34 -0400522 # test_defect_handling:test_invalid_chars_in_base64_payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000523 def test_broken_base64_payload(self):
524 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
525 msg = Message()
526 msg['content-type'] = 'audio/x-midi'
527 msg['content-transfer-encoding'] = 'base64'
528 msg.set_payload(x)
529 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -0400530 (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0'
531 b'\xa1\x00p\xf6\xbf\xe9\x0f'))
532 self.assertIsInstance(msg.defects[0],
533 errors.InvalidBase64CharactersDefect)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000534
R David Murraya2860e82011-04-16 09:20:30 -0400535 def test_broken_unicode_payload(self):
536 # This test improves coverage but is not a compliance test.
537 # The behavior in this situation is currently undefined by the API.
538 x = 'this is a br\xf6ken thing to do'
539 msg = Message()
540 msg['content-type'] = 'text/plain'
541 msg['content-transfer-encoding'] = '8bit'
542 msg.set_payload(x)
543 self.assertEqual(msg.get_payload(decode=True),
544 bytes(x, 'raw-unicode-escape'))
545
546 def test_questionable_bytes_payload(self):
547 # This test improves coverage but is not a compliance test,
548 # since it involves poking inside the black box.
549 x = 'this is a quéstionable thing to do'.encode('utf-8')
550 msg = Message()
551 msg['content-type'] = 'text/plain; charset="utf-8"'
552 msg['content-transfer-encoding'] = '8bit'
553 msg._payload = x
554 self.assertEqual(msg.get_payload(decode=True), x)
555
R. David Murray7ec754b2010-12-13 23:51:19 +0000556 # Issue 1078919
557 def test_ascii_add_header(self):
558 msg = Message()
559 msg.add_header('Content-Disposition', 'attachment',
560 filename='bud.gif')
561 self.assertEqual('attachment; filename="bud.gif"',
562 msg['Content-Disposition'])
563
564 def test_noascii_add_header(self):
565 msg = Message()
566 msg.add_header('Content-Disposition', 'attachment',
567 filename="Fußballer.ppt")
568 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000569 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000570 msg['Content-Disposition'])
571
572 def test_nonascii_add_header_via_triple(self):
573 msg = Message()
574 msg.add_header('Content-Disposition', 'attachment',
575 filename=('iso-8859-1', '', 'Fußballer.ppt'))
576 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000577 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
578 msg['Content-Disposition'])
579
580 def test_ascii_add_header_with_tspecial(self):
581 msg = Message()
582 msg.add_header('Content-Disposition', 'attachment',
583 filename="windows [filename].ppt")
584 self.assertEqual(
585 'attachment; filename="windows [filename].ppt"',
586 msg['Content-Disposition'])
587
588 def test_nonascii_add_header_with_tspecial(self):
589 msg = Message()
590 msg.add_header('Content-Disposition', 'attachment',
591 filename="Fußballer [filename].ppt")
592 self.assertEqual(
593 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000594 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000595
R David Murraya2860e82011-04-16 09:20:30 -0400596 def test_add_header_with_name_only_param(self):
597 msg = Message()
598 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
599 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
600
601 def test_add_header_with_no_value(self):
602 msg = Message()
603 msg.add_header('X-Status', None)
604 self.assertEqual('', msg['X-Status'])
605
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000606 # Issue 5871: reject an attempt to embed a header inside a header value
607 # (header injection attack).
608 def test_embeded_header_via_Header_rejected(self):
609 msg = Message()
610 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
611 self.assertRaises(errors.HeaderParseError, msg.as_string)
612
613 def test_embeded_header_via_string_rejected(self):
614 msg = Message()
615 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
616 self.assertRaises(errors.HeaderParseError, msg.as_string)
617
R David Murray7441a7a2012-03-14 02:59:51 -0400618 def test_unicode_header_defaults_to_utf8_encoding(self):
619 # Issue 14291
620 m = MIMEText('abc\n')
621 m['Subject'] = 'É test'
622 self.assertEqual(str(m),textwrap.dedent("""\
623 Content-Type: text/plain; charset="us-ascii"
624 MIME-Version: 1.0
625 Content-Transfer-Encoding: 7bit
626 Subject: =?utf-8?q?=C3=89_test?=
627
628 abc
629 """))
630
R David Murray8680bcc2012-03-22 22:17:51 -0400631 def test_unicode_body_defaults_to_utf8_encoding(self):
632 # Issue 14291
633 m = MIMEText('É testabc\n')
634 self.assertEqual(str(m),textwrap.dedent("""\
R David Murray8680bcc2012-03-22 22:17:51 -0400635 Content-Type: text/plain; charset="utf-8"
R David Murray42243c42012-03-22 22:40:44 -0400636 MIME-Version: 1.0
R David Murray8680bcc2012-03-22 22:17:51 -0400637 Content-Transfer-Encoding: base64
638
639 w4kgdGVzdGFiYwo=
640 """))
641
642
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000643# Test the email.encoders module
644class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400645
646 def test_EncodersEncode_base64(self):
647 with openfile('PyBanner048.gif', 'rb') as fp:
648 bindata = fp.read()
649 mimed = email.mime.image.MIMEImage(bindata)
650 base64ed = mimed.get_payload()
651 # the transfer-encoded body lines should all be <=76 characters
652 lines = base64ed.split('\n')
653 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
654
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000655 def test_encode_empty_payload(self):
656 eq = self.assertEqual
657 msg = Message()
658 msg.set_charset('us-ascii')
659 eq(msg['content-transfer-encoding'], '7bit')
660
661 def test_default_cte(self):
662 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000663 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000664 msg = MIMEText('hello world')
665 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000666 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000667 msg = MIMEText('hello \xf8 world')
R David Murray8680bcc2012-03-22 22:17:51 -0400668 eq(msg['content-transfer-encoding'], 'base64')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000669 # And now with a different charset
670 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
671 eq(msg['content-transfer-encoding'], 'quoted-printable')
672
R. David Murraye85200d2010-05-06 01:41:14 +0000673 def test_encode7or8bit(self):
674 # Make sure a charset whose input character set is 8bit but
675 # whose output character set is 7bit gets a transfer-encoding
676 # of 7bit.
677 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000678 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000679 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000680
R David Murrayf581b372013-02-05 10:49:49 -0500681 def test_qp_encode_latin1(self):
682 msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1')
683 self.assertEqual(str(msg), textwrap.dedent("""\
684 MIME-Version: 1.0
685 Content-Type: text/text; charset="iso-8859-1"
686 Content-Transfer-Encoding: quoted-printable
687
688 =E1=F6
689 """))
690
691 def test_qp_encode_non_latin1(self):
692 # Issue 16948
693 msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2')
694 self.assertEqual(str(msg), textwrap.dedent("""\
695 MIME-Version: 1.0
696 Content-Type: text/text; charset="iso-8859-2"
697 Content-Transfer-Encoding: quoted-printable
698
699 =BF
700 """))
701
Ezio Melottib3aedd42010-11-20 19:04:17 +0000702
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000703# Test long header wrapping
704class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400705
706 maxDiff = None
707
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000708 def test_split_long_continuation(self):
709 eq = self.ndiffAssertEqual
710 msg = email.message_from_string("""\
711Subject: bug demonstration
712\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
713\tmore text
714
715test
716""")
717 sfp = StringIO()
718 g = Generator(sfp)
719 g.flatten(msg)
720 eq(sfp.getvalue(), """\
721Subject: bug demonstration
722\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
723\tmore text
724
725test
726""")
727
728 def test_another_long_almost_unsplittable_header(self):
729 eq = self.ndiffAssertEqual
730 hstr = """\
731bug demonstration
732\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
733\tmore text"""
734 h = Header(hstr, continuation_ws='\t')
735 eq(h.encode(), """\
736bug demonstration
737\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
738\tmore text""")
739 h = Header(hstr.replace('\t', ' '))
740 eq(h.encode(), """\
741bug demonstration
742 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
743 more text""")
744
745 def test_long_nonstring(self):
746 eq = self.ndiffAssertEqual
747 g = Charset("iso-8859-1")
748 cz = Charset("iso-8859-2")
749 utf8 = Charset("utf-8")
750 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
751 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
752 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
753 b'bef\xf6rdert. ')
754 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
755 b'd\xf9vtipu.. ')
756 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
757 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
758 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
759 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
760 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
761 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
762 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
763 '\u3044\u307e\u3059\u3002')
764 h = Header(g_head, g, header_name='Subject')
765 h.append(cz_head, cz)
766 h.append(utf8_head, utf8)
767 msg = Message()
768 msg['Subject'] = h
769 sfp = StringIO()
770 g = Generator(sfp)
771 g.flatten(msg)
772 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000773Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
774 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
775 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
776 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
777 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
778 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
779 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
780 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
781 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
782 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
783 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000784
785""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000786 eq(h.encode(maxlinelen=76), """\
787=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
788 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
789 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
790 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
791 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
792 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
793 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
794 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
795 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
796 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
797 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000798
799 def test_long_header_encode(self):
800 eq = self.ndiffAssertEqual
801 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
802 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
803 header_name='X-Foobar-Spoink-Defrobnit')
804 eq(h.encode(), '''\
805wasnipoop; giraffes="very-long-necked-animals";
806 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
807
808 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
809 eq = self.ndiffAssertEqual
810 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
811 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
812 header_name='X-Foobar-Spoink-Defrobnit',
813 continuation_ws='\t')
814 eq(h.encode(), '''\
815wasnipoop; giraffes="very-long-necked-animals";
816 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
817
818 def test_long_header_encode_with_tab_continuation(self):
819 eq = self.ndiffAssertEqual
820 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
821 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
822 header_name='X-Foobar-Spoink-Defrobnit',
823 continuation_ws='\t')
824 eq(h.encode(), '''\
825wasnipoop; giraffes="very-long-necked-animals";
826\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
827
R David Murray3a6152f2011-03-14 21:13:03 -0400828 def test_header_encode_with_different_output_charset(self):
829 h = Header('文', 'euc-jp')
830 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
831
832 def test_long_header_encode_with_different_output_charset(self):
833 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
834 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
835 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
836 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
837 res = """\
838=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
839 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
840 self.assertEqual(h.encode(), res)
841
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000842 def test_header_splitter(self):
843 eq = self.ndiffAssertEqual
844 msg = MIMEText('')
845 # It'd be great if we could use add_header() here, but that doesn't
846 # guarantee an order of the parameters.
847 msg['X-Foobar-Spoink-Defrobnit'] = (
848 'wasnipoop; giraffes="very-long-necked-animals"; '
849 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
850 sfp = StringIO()
851 g = Generator(sfp)
852 g.flatten(msg)
853 eq(sfp.getvalue(), '''\
854Content-Type: text/plain; charset="us-ascii"
855MIME-Version: 1.0
856Content-Transfer-Encoding: 7bit
857X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
858 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
859
860''')
861
862 def test_no_semis_header_splitter(self):
863 eq = self.ndiffAssertEqual
864 msg = Message()
865 msg['From'] = 'test@dom.ain'
866 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
867 msg.set_payload('Test')
868 sfp = StringIO()
869 g = Generator(sfp)
870 g.flatten(msg)
871 eq(sfp.getvalue(), """\
872From: test@dom.ain
873References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
874 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
875
876Test""")
877
R David Murray7da4db12011-04-07 20:37:17 -0400878 def test_last_split_chunk_does_not_fit(self):
879 eq = self.ndiffAssertEqual
880 h = Header('Subject: the first part of this is short, but_the_second'
881 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
882 '_all_by_itself')
883 eq(h.encode(), """\
884Subject: the first part of this is short,
885 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
886
887 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
888 eq = self.ndiffAssertEqual
889 h = Header(', but_the_second'
890 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
891 '_all_by_itself')
892 eq(h.encode(), """\
893,
894 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
895
896 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
897 eq = self.ndiffAssertEqual
898 h = Header(', , but_the_second'
899 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
900 '_all_by_itself')
901 eq(h.encode(), """\
902, ,
903 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
904
905 def test_trailing_splitable_on_overlong_unsplitable(self):
906 eq = self.ndiffAssertEqual
907 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
908 'be_on_a_line_all_by_itself;')
909 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
910 "be_on_a_line_all_by_itself;")
911
912 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
913 eq = self.ndiffAssertEqual
914 h = Header('; '
915 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400916 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400917 eq(h.encode(), """\
918;
R David Murray01581ee2011-04-18 10:04:34 -0400919 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400920
R David Murraye1292a22011-04-07 20:54:03 -0400921 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400922 eq = self.ndiffAssertEqual
923 h = Header('This is a long line that has two whitespaces in a row. '
924 'This used to cause truncation of the header when folded')
925 eq(h.encode(), """\
926This is a long line that has two whitespaces in a row. This used to cause
927 truncation of the header when folded""")
928
Ezio Melotti1c4810b2013-08-10 18:57:12 +0300929 def test_splitter_split_on_punctuation_only_if_fws_with_header(self):
R David Murray01581ee2011-04-18 10:04:34 -0400930 eq = self.ndiffAssertEqual
931 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
932 'they;arenotlegal;fold,points')
933 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
934 "arenotlegal;fold,points")
935
936 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
937 eq = self.ndiffAssertEqual
938 h = Header('this is a test where we need to have more than one line '
939 'before; our final line that is just too big to fit;; '
940 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
941 'be_on_a_line_all_by_itself;')
942 eq(h.encode(), """\
943this is a test where we need to have more than one line before;
944 our final line that is just too big to fit;;
945 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
946
947 def test_overlong_last_part_followed_by_split_point(self):
948 eq = self.ndiffAssertEqual
949 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
950 'be_on_a_line_all_by_itself ')
951 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
952 "should_be_on_a_line_all_by_itself ")
953
954 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
955 eq = self.ndiffAssertEqual
956 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
957 'before_our_final_line_; ; '
958 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
959 'be_on_a_line_all_by_itself; ')
960 eq(h.encode(), """\
961this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
962 ;
963 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
964
965 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
966 eq = self.ndiffAssertEqual
967 h = Header('this is a test where we need to have more than one line '
968 'before our final line; ; '
969 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
970 'be_on_a_line_all_by_itself; ')
971 eq(h.encode(), """\
972this is a test where we need to have more than one line before our final line;
973 ;
974 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
975
976 def test_long_header_with_whitespace_runs(self):
977 eq = self.ndiffAssertEqual
978 msg = Message()
979 msg['From'] = 'test@dom.ain'
980 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
981 msg.set_payload('Test')
982 sfp = StringIO()
983 g = Generator(sfp)
984 g.flatten(msg)
985 eq(sfp.getvalue(), """\
986From: test@dom.ain
987References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
988 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
989 <foo@dom.ain> <foo@dom.ain>\x20\x20
990
991Test""")
992
993 def test_long_run_with_semi_header_splitter(self):
994 eq = self.ndiffAssertEqual
995 msg = Message()
996 msg['From'] = 'test@dom.ain'
997 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
998 msg.set_payload('Test')
999 sfp = StringIO()
1000 g = Generator(sfp)
1001 g.flatten(msg)
1002 eq(sfp.getvalue(), """\
1003From: test@dom.ain
1004References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1005 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1006 <foo@dom.ain>; abc
1007
1008Test""")
1009
1010 def test_splitter_split_on_punctuation_only_if_fws(self):
1011 eq = self.ndiffAssertEqual
1012 msg = Message()
1013 msg['From'] = 'test@dom.ain'
1014 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
1015 'they;arenotlegal;fold,points')
1016 msg.set_payload('Test')
1017 sfp = StringIO()
1018 g = Generator(sfp)
1019 g.flatten(msg)
1020 # XXX the space after the header should not be there.
1021 eq(sfp.getvalue(), """\
1022From: test@dom.ain
1023References:\x20
1024 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
1025
1026Test""")
1027
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001028 def test_no_split_long_header(self):
1029 eq = self.ndiffAssertEqual
1030 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +00001031 h = Header(hstr)
1032 # These come on two lines because Headers are really field value
1033 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001034 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001035References:
1036 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1037 h = Header('x' * 80)
1038 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001039
1040 def test_splitting_multiple_long_lines(self):
1041 eq = self.ndiffAssertEqual
1042 hstr = """\
1043from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1044\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1045\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1046"""
1047 h = Header(hstr, continuation_ws='\t')
1048 eq(h.encode(), """\
1049from babylon.socal-raves.org (localhost [127.0.0.1]);
1050 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1051 for <mailman-admin@babylon.socal-raves.org>;
1052 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1053\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1054 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1055 for <mailman-admin@babylon.socal-raves.org>;
1056 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1057\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1058 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1059 for <mailman-admin@babylon.socal-raves.org>;
1060 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1061
1062 def test_splitting_first_line_only_is_long(self):
1063 eq = self.ndiffAssertEqual
1064 hstr = """\
1065from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1066\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1067\tid 17k4h5-00034i-00
1068\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1069 h = Header(hstr, maxlinelen=78, header_name='Received',
1070 continuation_ws='\t')
1071 eq(h.encode(), """\
1072from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1073 helo=cthulhu.gerg.ca)
1074\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1075\tid 17k4h5-00034i-00
1076\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1077
1078 def test_long_8bit_header(self):
1079 eq = self.ndiffAssertEqual
1080 msg = Message()
1081 h = Header('Britische Regierung gibt', 'iso-8859-1',
1082 header_name='Subject')
1083 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001084 eq(h.encode(maxlinelen=76), """\
1085=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1086 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001087 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001088 eq(msg.as_string(maxheaderlen=76), """\
1089Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1090 =?iso-8859-1?q?hore-Windkraftprojekte?=
1091
1092""")
1093 eq(msg.as_string(maxheaderlen=0), """\
1094Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001095
1096""")
1097
1098 def test_long_8bit_header_no_charset(self):
1099 eq = self.ndiffAssertEqual
1100 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001101 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1102 'f\xfcr Offshore-Windkraftprojekte '
1103 '<a-very-long-address@example.com>')
1104 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001105 eq(msg.as_string(maxheaderlen=78), """\
1106Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1107 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1108
1109""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001110 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001111 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001112 header_name='Reply-To')
1113 eq(msg.as_string(maxheaderlen=78), """\
1114Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1115 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001116
1117""")
1118
1119 def test_long_to_header(self):
1120 eq = self.ndiffAssertEqual
1121 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001122 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001123 '"Someone Test #B" <someone@umich.edu>, '
1124 '"Someone Test #C" <someone@eecs.umich.edu>, '
1125 '"Someone Test #D" <someone@eecs.umich.edu>')
1126 msg = Message()
1127 msg['To'] = to
1128 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001129To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001130 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001131 "Someone Test #C" <someone@eecs.umich.edu>,
1132 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001133
1134''')
1135
1136 def test_long_line_after_append(self):
1137 eq = self.ndiffAssertEqual
1138 s = 'This is an example of string which has almost the limit of header length.'
1139 h = Header(s)
1140 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001141 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001142This is an example of string which has almost the limit of header length.
1143 Add another line.""")
1144
1145 def test_shorter_line_with_append(self):
1146 eq = self.ndiffAssertEqual
1147 s = 'This is a shorter line.'
1148 h = Header(s)
1149 h.append('Add another sentence. (Surprise?)')
1150 eq(h.encode(),
1151 'This is a shorter line. Add another sentence. (Surprise?)')
1152
1153 def test_long_field_name(self):
1154 eq = self.ndiffAssertEqual
1155 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001156 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1157 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1158 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1159 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001160 h = Header(gs, 'iso-8859-1', header_name=fn)
1161 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001162 eq(h.encode(maxlinelen=76), """\
1163=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1164 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1165 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1166 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001167
1168 def test_long_received_header(self):
1169 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1170 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1171 'Wed, 05 Mar 2003 18:10:18 -0700')
1172 msg = Message()
1173 msg['Received-1'] = Header(h, continuation_ws='\t')
1174 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001175 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001176 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001177Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1178 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001179 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001180Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1181 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001182 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001183
1184""")
1185
1186 def test_string_headerinst_eq(self):
1187 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1188 'tu-muenchen.de> (David Bremner\'s message of '
1189 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1190 msg = Message()
1191 msg['Received-1'] = Header(h, header_name='Received-1',
1192 continuation_ws='\t')
1193 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001194 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001195 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001196Received-1:\x20
1197 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1198 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1199Received-2:\x20
1200 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1201 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001202
1203""")
1204
1205 def test_long_unbreakable_lines_with_continuation(self):
1206 eq = self.ndiffAssertEqual
1207 msg = Message()
1208 t = """\
1209iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1210 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1211 msg['Face-1'] = t
1212 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001213 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001214 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001215 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001216 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001217Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001218 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001219 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001220Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001221 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001222 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001223Face-3:\x20
1224 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1225 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001226
1227""")
1228
1229 def test_another_long_multiline_header(self):
1230 eq = self.ndiffAssertEqual
1231 m = ('Received: from siimage.com '
1232 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001233 'Microsoft SMTPSVC(5.0.2195.4905); '
1234 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001235 msg = email.message_from_string(m)
1236 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001237Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1238 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001239
1240''')
1241
1242 def test_long_lines_with_different_header(self):
1243 eq = self.ndiffAssertEqual
1244 h = ('List-Unsubscribe: '
1245 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1246 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1247 '?subject=unsubscribe>')
1248 msg = Message()
1249 msg['List'] = h
1250 msg['List'] = Header(h, header_name='List')
1251 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001252List: List-Unsubscribe:
1253 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001254 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001255List: List-Unsubscribe:
1256 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001257 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001258
1259""")
1260
R. David Murray6f0022d2011-01-07 21:57:25 +00001261 def test_long_rfc2047_header_with_embedded_fws(self):
1262 h = Header(textwrap.dedent("""\
1263 We're going to pretend this header is in a non-ascii character set
1264 \tto see if line wrapping with encoded words and embedded
1265 folding white space works"""),
1266 charset='utf-8',
1267 header_name='Test')
1268 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1269 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1270 =?utf-8?q?cter_set?=
1271 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1272 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1273
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001274
Ezio Melottib3aedd42010-11-20 19:04:17 +00001275
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001276# Test mangling of "From " lines in the body of a message
1277class TestFromMangling(unittest.TestCase):
1278 def setUp(self):
1279 self.msg = Message()
1280 self.msg['From'] = 'aaa@bbb.org'
1281 self.msg.set_payload("""\
1282From the desk of A.A.A.:
1283Blah blah blah
1284""")
1285
1286 def test_mangled_from(self):
1287 s = StringIO()
1288 g = Generator(s, mangle_from_=True)
1289 g.flatten(self.msg)
1290 self.assertEqual(s.getvalue(), """\
1291From: aaa@bbb.org
1292
1293>From the desk of A.A.A.:
1294Blah blah blah
1295""")
1296
1297 def test_dont_mangle_from(self):
1298 s = StringIO()
1299 g = Generator(s, mangle_from_=False)
1300 g.flatten(self.msg)
1301 self.assertEqual(s.getvalue(), """\
1302From: aaa@bbb.org
1303
1304From the desk of A.A.A.:
1305Blah blah blah
1306""")
1307
R David Murray6a31bc62012-07-22 21:47:53 -04001308 def test_mangle_from_in_preamble_and_epilog(self):
1309 s = StringIO()
1310 g = Generator(s, mangle_from_=True)
1311 msg = email.message_from_string(textwrap.dedent("""\
1312 From: foo@bar.com
1313 Mime-Version: 1.0
1314 Content-Type: multipart/mixed; boundary=XXX
1315
1316 From somewhere unknown
1317
1318 --XXX
1319 Content-Type: text/plain
1320
1321 foo
1322
1323 --XXX--
1324
1325 From somewhere unknowable
1326 """))
1327 g.flatten(msg)
1328 self.assertEqual(len([1 for x in s.getvalue().split('\n')
1329 if x.startswith('>From ')]), 2)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001330
R David Murray638d40b2012-08-24 11:14:13 -04001331 def test_mangled_from_with_bad_bytes(self):
1332 source = textwrap.dedent("""\
1333 Content-Type: text/plain; charset="utf-8"
1334 MIME-Version: 1.0
1335 Content-Transfer-Encoding: 8bit
1336 From: aaa@bbb.org
1337
1338 """).encode('utf-8')
1339 msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')
1340 b = BytesIO()
1341 g = BytesGenerator(b, mangle_from_=True)
1342 g.flatten(msg)
1343 self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')
1344
Ezio Melottib3aedd42010-11-20 19:04:17 +00001345
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001346# Test the basic MIMEAudio class
1347class TestMIMEAudio(unittest.TestCase):
1348 def setUp(self):
R David Murray28346b82011-03-31 11:40:20 -04001349 with openfile('audiotest.au', 'rb') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001350 self._audiodata = fp.read()
1351 self._au = MIMEAudio(self._audiodata)
1352
1353 def test_guess_minor_type(self):
1354 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1355
1356 def test_encoding(self):
1357 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001358 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1359 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001360
1361 def test_checkSetMinor(self):
1362 au = MIMEAudio(self._audiodata, 'fish')
1363 self.assertEqual(au.get_content_type(), 'audio/fish')
1364
1365 def test_add_header(self):
1366 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001367 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001368 self._au.add_header('Content-Disposition', 'attachment',
1369 filename='audiotest.au')
1370 eq(self._au['content-disposition'],
1371 'attachment; filename="audiotest.au"')
1372 eq(self._au.get_params(header='content-disposition'),
1373 [('attachment', ''), ('filename', 'audiotest.au')])
1374 eq(self._au.get_param('filename', header='content-disposition'),
1375 'audiotest.au')
1376 missing = []
1377 eq(self._au.get_param('attachment', header='content-disposition'), '')
1378 unless(self._au.get_param('foo', failobj=missing,
1379 header='content-disposition') is missing)
1380 # Try some missing stuff
1381 unless(self._au.get_param('foobar', missing) is missing)
1382 unless(self._au.get_param('attachment', missing,
1383 header='foobar') is missing)
1384
1385
Ezio Melottib3aedd42010-11-20 19:04:17 +00001386
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001387# Test the basic MIMEImage class
1388class TestMIMEImage(unittest.TestCase):
1389 def setUp(self):
1390 with openfile('PyBanner048.gif', 'rb') as fp:
1391 self._imgdata = fp.read()
1392 self._im = MIMEImage(self._imgdata)
1393
1394 def test_guess_minor_type(self):
1395 self.assertEqual(self._im.get_content_type(), 'image/gif')
1396
1397 def test_encoding(self):
1398 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001399 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1400 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001401
1402 def test_checkSetMinor(self):
1403 im = MIMEImage(self._imgdata, 'fish')
1404 self.assertEqual(im.get_content_type(), 'image/fish')
1405
1406 def test_add_header(self):
1407 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001408 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001409 self._im.add_header('Content-Disposition', 'attachment',
1410 filename='dingusfish.gif')
1411 eq(self._im['content-disposition'],
1412 'attachment; filename="dingusfish.gif"')
1413 eq(self._im.get_params(header='content-disposition'),
1414 [('attachment', ''), ('filename', 'dingusfish.gif')])
1415 eq(self._im.get_param('filename', header='content-disposition'),
1416 'dingusfish.gif')
1417 missing = []
1418 eq(self._im.get_param('attachment', header='content-disposition'), '')
1419 unless(self._im.get_param('foo', failobj=missing,
1420 header='content-disposition') is missing)
1421 # Try some missing stuff
1422 unless(self._im.get_param('foobar', missing) is missing)
1423 unless(self._im.get_param('attachment', missing,
1424 header='foobar') is missing)
1425
1426
Ezio Melottib3aedd42010-11-20 19:04:17 +00001427
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001428# Test the basic MIMEApplication class
1429class TestMIMEApplication(unittest.TestCase):
1430 def test_headers(self):
1431 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001432 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001433 eq(msg.get_content_type(), 'application/octet-stream')
1434 eq(msg['content-transfer-encoding'], 'base64')
1435
1436 def test_body(self):
1437 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001438 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1439 msg = MIMEApplication(bytesdata)
1440 # whitespace in the cte encoded block is RFC-irrelevant.
1441 eq(msg.get_payload().strip(), '+vv8/f7/')
1442 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001443
R David Murrayec317a82013-02-11 10:51:28 -05001444 def test_binary_body_with_encode_7or8bit(self):
1445 # Issue 17171.
1446 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1447 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit)
1448 # Treated as a string, this will be invalid code points.
1449 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1450 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1451 self.assertEqual(msg['Content-Transfer-Encoding'], '8bit')
1452 s = BytesIO()
1453 g = BytesGenerator(s)
1454 g.flatten(msg)
1455 wireform = s.getvalue()
1456 msg2 = email.message_from_bytes(wireform)
1457 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1458 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1459 self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit')
1460
1461 def test_binary_body_with_encode_noop(self):
R David Murrayceaa8b12013-02-09 13:02:58 -05001462 # Issue 16564: This does not produce an RFC valid message, since to be
1463 # valid it should have a CTE of binary. But the below works in
1464 # Python2, and is documented as working this way.
1465 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1466 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1467 # Treated as a string, this will be invalid code points.
1468 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1469 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1470 s = BytesIO()
1471 g = BytesGenerator(s)
1472 g.flatten(msg)
1473 wireform = s.getvalue()
1474 msg2 = email.message_from_bytes(wireform)
1475 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1476 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001477
R David Murrayf6069f92013-06-27 18:37:00 -04001478 def test_binary_body_with_encode_quopri(self):
1479 # Issue 14360.
1480 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff '
1481 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_quopri)
1482 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1483 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1484 self.assertEqual(msg['Content-Transfer-Encoding'], 'quoted-printable')
1485 s = BytesIO()
1486 g = BytesGenerator(s)
1487 g.flatten(msg)
1488 wireform = s.getvalue()
1489 msg2 = email.message_from_bytes(wireform)
1490 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1491 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1492 self.assertEqual(msg2['Content-Transfer-Encoding'], 'quoted-printable')
1493
1494 def test_binary_body_with_encode_base64(self):
1495 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1496 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_base64)
1497 self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1498 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1499 s = BytesIO()
1500 g = BytesGenerator(s)
1501 g.flatten(msg)
1502 wireform = s.getvalue()
1503 msg2 = email.message_from_bytes(wireform)
1504 self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1505 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1506
Ezio Melottib3aedd42010-11-20 19:04:17 +00001507
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001508# Test the basic MIMEText class
1509class TestMIMEText(unittest.TestCase):
1510 def setUp(self):
1511 self._msg = MIMEText('hello there')
1512
1513 def test_types(self):
1514 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001515 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001516 eq(self._msg.get_content_type(), 'text/plain')
1517 eq(self._msg.get_param('charset'), 'us-ascii')
1518 missing = []
1519 unless(self._msg.get_param('foobar', missing) is missing)
1520 unless(self._msg.get_param('charset', missing, header='foobar')
1521 is missing)
1522
1523 def test_payload(self):
1524 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001525 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001526
1527 def test_charset(self):
1528 eq = self.assertEqual
1529 msg = MIMEText('hello there', _charset='us-ascii')
1530 eq(msg.get_charset().input_charset, 'us-ascii')
1531 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1532
R. David Murray850fc852010-06-03 01:58:28 +00001533 def test_7bit_input(self):
1534 eq = self.assertEqual
1535 msg = MIMEText('hello there', _charset='us-ascii')
1536 eq(msg.get_charset().input_charset, 'us-ascii')
1537 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1538
1539 def test_7bit_input_no_charset(self):
1540 eq = self.assertEqual
1541 msg = MIMEText('hello there')
1542 eq(msg.get_charset(), 'us-ascii')
1543 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1544 self.assertTrue('hello there' in msg.as_string())
1545
1546 def test_utf8_input(self):
1547 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1548 eq = self.assertEqual
1549 msg = MIMEText(teststr, _charset='utf-8')
1550 eq(msg.get_charset().output_charset, 'utf-8')
1551 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1552 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1553
1554 @unittest.skip("can't fix because of backward compat in email5, "
1555 "will fix in email6")
1556 def test_utf8_input_no_charset(self):
1557 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1558 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1559
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001560
Ezio Melottib3aedd42010-11-20 19:04:17 +00001561
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001562# Test complicated multipart/* messages
1563class TestMultipart(TestEmailBase):
1564 def setUp(self):
1565 with openfile('PyBanner048.gif', 'rb') as fp:
1566 data = fp.read()
1567 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1568 image = MIMEImage(data, name='dingusfish.gif')
1569 image.add_header('content-disposition', 'attachment',
1570 filename='dingusfish.gif')
1571 intro = MIMEText('''\
1572Hi there,
1573
1574This is the dingus fish.
1575''')
1576 container.attach(intro)
1577 container.attach(image)
1578 container['From'] = 'Barry <barry@digicool.com>'
1579 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1580 container['Subject'] = 'Here is your dingus fish'
1581
1582 now = 987809702.54848599
1583 timetuple = time.localtime(now)
1584 if timetuple[-1] == 0:
1585 tzsecs = time.timezone
1586 else:
1587 tzsecs = time.altzone
1588 if tzsecs > 0:
1589 sign = '-'
1590 else:
1591 sign = '+'
1592 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1593 container['Date'] = time.strftime(
1594 '%a, %d %b %Y %H:%M:%S',
1595 time.localtime(now)) + tzoffset
1596 self._msg = container
1597 self._im = image
1598 self._txt = intro
1599
1600 def test_hierarchy(self):
1601 # convenience
1602 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001603 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001604 raises = self.assertRaises
1605 # tests
1606 m = self._msg
1607 unless(m.is_multipart())
1608 eq(m.get_content_type(), 'multipart/mixed')
1609 eq(len(m.get_payload()), 2)
1610 raises(IndexError, m.get_payload, 2)
1611 m0 = m.get_payload(0)
1612 m1 = m.get_payload(1)
1613 unless(m0 is self._txt)
1614 unless(m1 is self._im)
1615 eq(m.get_payload(), [m0, m1])
1616 unless(not m0.is_multipart())
1617 unless(not m1.is_multipart())
1618
1619 def test_empty_multipart_idempotent(self):
1620 text = """\
1621Content-Type: multipart/mixed; boundary="BOUNDARY"
1622MIME-Version: 1.0
1623Subject: A subject
1624To: aperson@dom.ain
1625From: bperson@dom.ain
1626
1627
1628--BOUNDARY
1629
1630
1631--BOUNDARY--
1632"""
1633 msg = Parser().parsestr(text)
1634 self.ndiffAssertEqual(text, msg.as_string())
1635
1636 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1637 outer = MIMEBase('multipart', 'mixed')
1638 outer['Subject'] = 'A subject'
1639 outer['To'] = 'aperson@dom.ain'
1640 outer['From'] = 'bperson@dom.ain'
1641 outer.set_boundary('BOUNDARY')
1642 self.ndiffAssertEqual(outer.as_string(), '''\
1643Content-Type: multipart/mixed; boundary="BOUNDARY"
1644MIME-Version: 1.0
1645Subject: A subject
1646To: aperson@dom.ain
1647From: bperson@dom.ain
1648
1649--BOUNDARY
1650
1651--BOUNDARY--''')
1652
1653 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1654 outer = MIMEBase('multipart', 'mixed')
1655 outer['Subject'] = 'A subject'
1656 outer['To'] = 'aperson@dom.ain'
1657 outer['From'] = 'bperson@dom.ain'
1658 outer.preamble = ''
1659 outer.epilogue = ''
1660 outer.set_boundary('BOUNDARY')
1661 self.ndiffAssertEqual(outer.as_string(), '''\
1662Content-Type: multipart/mixed; boundary="BOUNDARY"
1663MIME-Version: 1.0
1664Subject: A subject
1665To: aperson@dom.ain
1666From: bperson@dom.ain
1667
1668
1669--BOUNDARY
1670
1671--BOUNDARY--
1672''')
1673
1674 def test_one_part_in_a_multipart(self):
1675 eq = self.ndiffAssertEqual
1676 outer = MIMEBase('multipart', 'mixed')
1677 outer['Subject'] = 'A subject'
1678 outer['To'] = 'aperson@dom.ain'
1679 outer['From'] = 'bperson@dom.ain'
1680 outer.set_boundary('BOUNDARY')
1681 msg = MIMEText('hello world')
1682 outer.attach(msg)
1683 eq(outer.as_string(), '''\
1684Content-Type: multipart/mixed; boundary="BOUNDARY"
1685MIME-Version: 1.0
1686Subject: A subject
1687To: aperson@dom.ain
1688From: bperson@dom.ain
1689
1690--BOUNDARY
1691Content-Type: text/plain; charset="us-ascii"
1692MIME-Version: 1.0
1693Content-Transfer-Encoding: 7bit
1694
1695hello world
1696--BOUNDARY--''')
1697
1698 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1699 eq = self.ndiffAssertEqual
1700 outer = MIMEBase('multipart', 'mixed')
1701 outer['Subject'] = 'A subject'
1702 outer['To'] = 'aperson@dom.ain'
1703 outer['From'] = 'bperson@dom.ain'
1704 outer.preamble = ''
1705 msg = MIMEText('hello world')
1706 outer.attach(msg)
1707 outer.set_boundary('BOUNDARY')
1708 eq(outer.as_string(), '''\
1709Content-Type: multipart/mixed; boundary="BOUNDARY"
1710MIME-Version: 1.0
1711Subject: A subject
1712To: aperson@dom.ain
1713From: bperson@dom.ain
1714
1715
1716--BOUNDARY
1717Content-Type: text/plain; charset="us-ascii"
1718MIME-Version: 1.0
1719Content-Transfer-Encoding: 7bit
1720
1721hello world
1722--BOUNDARY--''')
1723
1724
1725 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1726 eq = self.ndiffAssertEqual
1727 outer = MIMEBase('multipart', 'mixed')
1728 outer['Subject'] = 'A subject'
1729 outer['To'] = 'aperson@dom.ain'
1730 outer['From'] = 'bperson@dom.ain'
1731 outer.preamble = None
1732 msg = MIMEText('hello world')
1733 outer.attach(msg)
1734 outer.set_boundary('BOUNDARY')
1735 eq(outer.as_string(), '''\
1736Content-Type: multipart/mixed; boundary="BOUNDARY"
1737MIME-Version: 1.0
1738Subject: A subject
1739To: aperson@dom.ain
1740From: bperson@dom.ain
1741
1742--BOUNDARY
1743Content-Type: text/plain; charset="us-ascii"
1744MIME-Version: 1.0
1745Content-Transfer-Encoding: 7bit
1746
1747hello world
1748--BOUNDARY--''')
1749
1750
1751 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1752 eq = self.ndiffAssertEqual
1753 outer = MIMEBase('multipart', 'mixed')
1754 outer['Subject'] = 'A subject'
1755 outer['To'] = 'aperson@dom.ain'
1756 outer['From'] = 'bperson@dom.ain'
1757 outer.epilogue = None
1758 msg = MIMEText('hello world')
1759 outer.attach(msg)
1760 outer.set_boundary('BOUNDARY')
1761 eq(outer.as_string(), '''\
1762Content-Type: multipart/mixed; boundary="BOUNDARY"
1763MIME-Version: 1.0
1764Subject: A subject
1765To: aperson@dom.ain
1766From: bperson@dom.ain
1767
1768--BOUNDARY
1769Content-Type: text/plain; charset="us-ascii"
1770MIME-Version: 1.0
1771Content-Transfer-Encoding: 7bit
1772
1773hello world
1774--BOUNDARY--''')
1775
1776
1777 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1778 eq = self.ndiffAssertEqual
1779 outer = MIMEBase('multipart', 'mixed')
1780 outer['Subject'] = 'A subject'
1781 outer['To'] = 'aperson@dom.ain'
1782 outer['From'] = 'bperson@dom.ain'
1783 outer.epilogue = ''
1784 msg = MIMEText('hello world')
1785 outer.attach(msg)
1786 outer.set_boundary('BOUNDARY')
1787 eq(outer.as_string(), '''\
1788Content-Type: multipart/mixed; boundary="BOUNDARY"
1789MIME-Version: 1.0
1790Subject: A subject
1791To: aperson@dom.ain
1792From: bperson@dom.ain
1793
1794--BOUNDARY
1795Content-Type: text/plain; charset="us-ascii"
1796MIME-Version: 1.0
1797Content-Transfer-Encoding: 7bit
1798
1799hello world
1800--BOUNDARY--
1801''')
1802
1803
1804 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1805 eq = self.ndiffAssertEqual
1806 outer = MIMEBase('multipart', 'mixed')
1807 outer['Subject'] = 'A subject'
1808 outer['To'] = 'aperson@dom.ain'
1809 outer['From'] = 'bperson@dom.ain'
1810 outer.epilogue = '\n'
1811 msg = MIMEText('hello world')
1812 outer.attach(msg)
1813 outer.set_boundary('BOUNDARY')
1814 eq(outer.as_string(), '''\
1815Content-Type: multipart/mixed; boundary="BOUNDARY"
1816MIME-Version: 1.0
1817Subject: A subject
1818To: aperson@dom.ain
1819From: bperson@dom.ain
1820
1821--BOUNDARY
1822Content-Type: text/plain; charset="us-ascii"
1823MIME-Version: 1.0
1824Content-Transfer-Encoding: 7bit
1825
1826hello world
1827--BOUNDARY--
1828
1829''')
1830
1831 def test_message_external_body(self):
1832 eq = self.assertEqual
1833 msg = self._msgobj('msg_36.txt')
1834 eq(len(msg.get_payload()), 2)
1835 msg1 = msg.get_payload(1)
1836 eq(msg1.get_content_type(), 'multipart/alternative')
1837 eq(len(msg1.get_payload()), 2)
1838 for subpart in msg1.get_payload():
1839 eq(subpart.get_content_type(), 'message/external-body')
1840 eq(len(subpart.get_payload()), 1)
1841 subsubpart = subpart.get_payload(0)
1842 eq(subsubpart.get_content_type(), 'text/plain')
1843
1844 def test_double_boundary(self):
1845 # msg_37.txt is a multipart that contains two dash-boundary's in a
1846 # row. Our interpretation of RFC 2046 calls for ignoring the second
1847 # and subsequent boundaries.
1848 msg = self._msgobj('msg_37.txt')
1849 self.assertEqual(len(msg.get_payload()), 3)
1850
1851 def test_nested_inner_contains_outer_boundary(self):
1852 eq = self.ndiffAssertEqual
1853 # msg_38.txt has an inner part that contains outer boundaries. My
1854 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1855 # these are illegal and should be interpreted as unterminated inner
1856 # parts.
1857 msg = self._msgobj('msg_38.txt')
1858 sfp = StringIO()
1859 iterators._structure(msg, sfp)
1860 eq(sfp.getvalue(), """\
1861multipart/mixed
1862 multipart/mixed
1863 multipart/alternative
1864 text/plain
1865 text/plain
1866 text/plain
1867 text/plain
1868""")
1869
1870 def test_nested_with_same_boundary(self):
1871 eq = self.ndiffAssertEqual
1872 # msg 39.txt is similarly evil in that it's got inner parts that use
1873 # the same boundary as outer parts. Again, I believe the way this is
1874 # parsed is closest to the spirit of RFC 2046
1875 msg = self._msgobj('msg_39.txt')
1876 sfp = StringIO()
1877 iterators._structure(msg, sfp)
1878 eq(sfp.getvalue(), """\
1879multipart/mixed
1880 multipart/mixed
1881 multipart/alternative
1882 application/octet-stream
1883 application/octet-stream
1884 text/plain
1885""")
1886
1887 def test_boundary_in_non_multipart(self):
1888 msg = self._msgobj('msg_40.txt')
1889 self.assertEqual(msg.as_string(), '''\
1890MIME-Version: 1.0
1891Content-Type: text/html; boundary="--961284236552522269"
1892
1893----961284236552522269
1894Content-Type: text/html;
1895Content-Transfer-Encoding: 7Bit
1896
1897<html></html>
1898
1899----961284236552522269--
1900''')
1901
1902 def test_boundary_with_leading_space(self):
1903 eq = self.assertEqual
1904 msg = email.message_from_string('''\
1905MIME-Version: 1.0
1906Content-Type: multipart/mixed; boundary=" XXXX"
1907
1908-- XXXX
1909Content-Type: text/plain
1910
1911
1912-- XXXX
1913Content-Type: text/plain
1914
1915-- XXXX--
1916''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001917 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001918 eq(msg.get_boundary(), ' XXXX')
1919 eq(len(msg.get_payload()), 2)
1920
1921 def test_boundary_without_trailing_newline(self):
1922 m = Parser().parsestr("""\
1923Content-Type: multipart/mixed; boundary="===============0012394164=="
1924MIME-Version: 1.0
1925
1926--===============0012394164==
1927Content-Type: image/file1.jpg
1928MIME-Version: 1.0
1929Content-Transfer-Encoding: base64
1930
1931YXNkZg==
1932--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001933 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001934
1935
Ezio Melottib3aedd42010-11-20 19:04:17 +00001936
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001937# Test some badly formatted messages
R David Murrayc27e5222012-05-25 15:01:48 -04001938class TestNonConformant(TestEmailBase):
R David Murray3edd22a2011-04-18 13:59:37 -04001939
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001940 def test_parse_missing_minor_type(self):
1941 eq = self.assertEqual
1942 msg = self._msgobj('msg_14.txt')
1943 eq(msg.get_content_type(), 'text/plain')
1944 eq(msg.get_content_maintype(), 'text')
1945 eq(msg.get_content_subtype(), 'plain')
1946
R David Murray80e0aee2012-05-27 21:23:34 -04001947 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001948 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001949 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001950 msg = self._msgobj('msg_15.txt')
1951 # XXX We can probably eventually do better
1952 inner = msg.get_payload(0)
1953 unless(hasattr(inner, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04001954 self.assertEqual(len(inner.defects), 1)
1955 unless(isinstance(inner.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001956 errors.StartBoundaryNotFoundDefect))
1957
R David Murray80e0aee2012-05-27 21:23:34 -04001958 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001959 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001960 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001961 msg = self._msgobj('msg_25.txt')
1962 unless(isinstance(msg.get_payload(), str))
R David Murrayc27e5222012-05-25 15:01:48 -04001963 self.assertEqual(len(msg.defects), 2)
1964 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04001965 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04001966 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001967 errors.MultipartInvariantViolationDefect))
1968
R David Murray749073a2011-06-22 13:47:53 -04001969 multipart_msg = textwrap.dedent("""\
1970 Date: Wed, 14 Nov 2007 12:56:23 GMT
1971 From: foo@bar.invalid
1972 To: foo@bar.invalid
1973 Subject: Content-Transfer-Encoding: base64 and multipart
1974 MIME-Version: 1.0
1975 Content-Type: multipart/mixed;
1976 boundary="===============3344438784458119861=="{}
1977
1978 --===============3344438784458119861==
1979 Content-Type: text/plain
1980
1981 Test message
1982
1983 --===============3344438784458119861==
1984 Content-Type: application/octet-stream
1985 Content-Transfer-Encoding: base64
1986
1987 YWJj
1988
1989 --===============3344438784458119861==--
1990 """)
1991
R David Murray80e0aee2012-05-27 21:23:34 -04001992 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04001993 def test_multipart_invalid_cte(self):
R David Murrayc27e5222012-05-25 15:01:48 -04001994 msg = self._str_msg(
1995 self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
1996 self.assertEqual(len(msg.defects), 1)
1997 self.assertIsInstance(msg.defects[0],
R David Murray749073a2011-06-22 13:47:53 -04001998 errors.InvalidMultipartContentTransferEncodingDefect)
1999
R David Murray80e0aee2012-05-27 21:23:34 -04002000 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04002001 def test_multipart_no_cte_no_defect(self):
R David Murrayc27e5222012-05-25 15:01:48 -04002002 msg = self._str_msg(self.multipart_msg.format(''))
2003 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04002004
R David Murray80e0aee2012-05-27 21:23:34 -04002005 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04002006 def test_multipart_valid_cte_no_defect(self):
2007 for cte in ('7bit', '8bit', 'BINary'):
R David Murrayc27e5222012-05-25 15:01:48 -04002008 msg = self._str_msg(
R David Murray749073a2011-06-22 13:47:53 -04002009 self.multipart_msg.format(
R David Murrayc27e5222012-05-25 15:01:48 -04002010 "\nContent-Transfer-Encoding: {}".format(cte)))
2011 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04002012
R David Murray97f43c02012-06-24 05:03:27 -04002013 # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002014 def test_invalid_content_type(self):
2015 eq = self.assertEqual
2016 neq = self.ndiffAssertEqual
2017 msg = Message()
2018 # RFC 2045, $5.2 says invalid yields text/plain
2019 msg['Content-Type'] = 'text'
2020 eq(msg.get_content_maintype(), 'text')
2021 eq(msg.get_content_subtype(), 'plain')
2022 eq(msg.get_content_type(), 'text/plain')
2023 # Clear the old value and try something /really/ invalid
2024 del msg['content-type']
2025 msg['Content-Type'] = 'foo'
2026 eq(msg.get_content_maintype(), 'text')
2027 eq(msg.get_content_subtype(), 'plain')
2028 eq(msg.get_content_type(), 'text/plain')
2029 # Still, make sure that the message is idempotently generated
2030 s = StringIO()
2031 g = Generator(s)
2032 g.flatten(msg)
2033 neq(s.getvalue(), 'Content-Type: foo\n\n')
2034
2035 def test_no_start_boundary(self):
2036 eq = self.ndiffAssertEqual
2037 msg = self._msgobj('msg_31.txt')
2038 eq(msg.get_payload(), """\
2039--BOUNDARY
2040Content-Type: text/plain
2041
2042message 1
2043
2044--BOUNDARY
2045Content-Type: text/plain
2046
2047message 2
2048
2049--BOUNDARY--
2050""")
2051
2052 def test_no_separating_blank_line(self):
2053 eq = self.ndiffAssertEqual
2054 msg = self._msgobj('msg_35.txt')
2055 eq(msg.as_string(), """\
2056From: aperson@dom.ain
2057To: bperson@dom.ain
2058Subject: here's something interesting
2059
2060counter to RFC 2822, there's no separating newline here
2061""")
2062
R David Murray80e0aee2012-05-27 21:23:34 -04002063 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002064 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002065 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002066 msg = self._msgobj('msg_41.txt')
2067 unless(hasattr(msg, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04002068 self.assertEqual(len(msg.defects), 2)
2069 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04002070 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04002071 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002072 errors.MultipartInvariantViolationDefect))
2073
R David Murray80e0aee2012-05-27 21:23:34 -04002074 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002075 def test_missing_start_boundary(self):
2076 outer = self._msgobj('msg_42.txt')
2077 # The message structure is:
2078 #
2079 # multipart/mixed
2080 # text/plain
2081 # message/rfc822
2082 # multipart/mixed [*]
2083 #
2084 # [*] This message is missing its start boundary
2085 bad = outer.get_payload(1).get_payload(0)
R David Murrayc27e5222012-05-25 15:01:48 -04002086 self.assertEqual(len(bad.defects), 1)
2087 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002088 errors.StartBoundaryNotFoundDefect))
2089
R David Murray80e0aee2012-05-27 21:23:34 -04002090 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002091 def test_first_line_is_continuation_header(self):
2092 eq = self.assertEqual
R David Murrayadbdcdb2012-05-27 20:45:01 -04002093 m = ' Line 1\nSubject: test\n\nbody'
R David Murrayc27e5222012-05-25 15:01:48 -04002094 msg = email.message_from_string(m)
R David Murrayadbdcdb2012-05-27 20:45:01 -04002095 eq(msg.keys(), ['Subject'])
2096 eq(msg.get_payload(), 'body')
R David Murrayc27e5222012-05-25 15:01:48 -04002097 eq(len(msg.defects), 1)
R David Murrayadbdcdb2012-05-27 20:45:01 -04002098 self.assertDefectsEqual(msg.defects,
2099 [errors.FirstHeaderLineIsContinuationDefect])
R David Murrayc27e5222012-05-25 15:01:48 -04002100 eq(msg.defects[0].line, ' Line 1\n')
R David Murray3edd22a2011-04-18 13:59:37 -04002101
R David Murrayd41595b2012-05-28 20:14:10 -04002102 # test_defect_handling
R David Murrayadbdcdb2012-05-27 20:45:01 -04002103 def test_missing_header_body_separator(self):
2104 # Our heuristic if we see a line that doesn't look like a header (no
2105 # leading whitespace but no ':') is to assume that the blank line that
2106 # separates the header from the body is missing, and to stop parsing
2107 # headers and start parsing the body.
2108 msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
2109 self.assertEqual(msg.keys(), ['Subject'])
2110 self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
2111 self.assertDefectsEqual(msg.defects,
2112 [errors.MissingHeaderBodySeparatorDefect])
2113
Ezio Melottib3aedd42010-11-20 19:04:17 +00002114
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002115# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00002116class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002117 def test_rfc2047_multiline(self):
2118 eq = self.assertEqual
2119 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2120 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2121 dh = decode_header(s)
2122 eq(dh, [
R David Murray07ea53c2012-06-02 17:56:49 -04002123 (b'Re: ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002124 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
R David Murray07ea53c2012-06-02 17:56:49 -04002125 (b' baz foo bar ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002126 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2127 header = make_header(dh)
2128 eq(str(header),
2129 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00002130 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002131Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2132 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002133
R David Murray07ea53c2012-06-02 17:56:49 -04002134 def test_whitespace_keeper_unicode(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002135 eq = self.assertEqual
2136 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2137 dh = decode_header(s)
2138 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
R David Murray07ea53c2012-06-02 17:56:49 -04002139 (b' Pirard <pirard@dom.ain>', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002140 header = str(make_header(dh))
2141 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2142
R David Murray07ea53c2012-06-02 17:56:49 -04002143 def test_whitespace_keeper_unicode_2(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002144 eq = self.assertEqual
2145 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2146 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002147 eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'),
2148 (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002149 hu = str(make_header(dh))
2150 eq(hu, 'The quick brown fox jumped over the lazy dog')
2151
2152 def test_rfc2047_missing_whitespace(self):
2153 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2154 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002155 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2156 (b'rg', None), (b'\xe5', 'iso-8859-1'),
2157 (b'sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002158
2159 def test_rfc2047_with_whitespace(self):
2160 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2161 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002162 self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'),
2163 (b' rg ', None), (b'\xe5', 'iso-8859-1'),
2164 (b' sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002165
R. David Murrayc4e69cc2010-08-03 22:14:10 +00002166 def test_rfc2047_B_bad_padding(self):
2167 s = '=?iso-8859-1?B?%s?='
2168 data = [ # only test complete bytes
2169 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2170 ('dmk=', b'vi'), ('dmk', b'vi')
2171 ]
2172 for q, a in data:
2173 dh = decode_header(s % q)
2174 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002175
R. David Murray31e984c2010-10-01 15:40:20 +00002176 def test_rfc2047_Q_invalid_digits(self):
2177 # issue 10004.
2178 s = '=?iso-8659-1?Q?andr=e9=zz?='
2179 self.assertEqual(decode_header(s),
2180 [(b'andr\xe9=zz', 'iso-8659-1')])
2181
R David Murray07ea53c2012-06-02 17:56:49 -04002182 def test_rfc2047_rfc2047_1(self):
2183 # 1st testcase at end of rfc2047
2184 s = '(=?ISO-8859-1?Q?a?=)'
2185 self.assertEqual(decode_header(s),
2186 [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)])
2187
2188 def test_rfc2047_rfc2047_2(self):
2189 # 2nd testcase at end of rfc2047
2190 s = '(=?ISO-8859-1?Q?a?= b)'
2191 self.assertEqual(decode_header(s),
2192 [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)])
2193
2194 def test_rfc2047_rfc2047_3(self):
2195 # 3rd testcase at end of rfc2047
2196 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2197 self.assertEqual(decode_header(s),
2198 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2199
2200 def test_rfc2047_rfc2047_4(self):
2201 # 4th testcase at end of rfc2047
2202 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2203 self.assertEqual(decode_header(s),
2204 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2205
2206 def test_rfc2047_rfc2047_5a(self):
2207 # 5th testcase at end of rfc2047 newline is \r\n
2208 s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)'
2209 self.assertEqual(decode_header(s),
2210 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2211
2212 def test_rfc2047_rfc2047_5b(self):
2213 # 5th testcase at end of rfc2047 newline is \n
2214 s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)'
2215 self.assertEqual(decode_header(s),
2216 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2217
2218 def test_rfc2047_rfc2047_6(self):
2219 # 6th testcase at end of rfc2047
2220 s = '(=?ISO-8859-1?Q?a_b?=)'
2221 self.assertEqual(decode_header(s),
2222 [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)])
2223
2224 def test_rfc2047_rfc2047_7(self):
2225 # 7th testcase at end of rfc2047
2226 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)'
2227 self.assertEqual(decode_header(s),
2228 [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'),
2229 (b')', None)])
2230 self.assertEqual(make_header(decode_header(s)).encode(), s.lower())
2231 self.assertEqual(str(make_header(decode_header(s))), '(a b)')
2232
R David Murray82ffabd2012-06-03 12:27:07 -04002233 def test_multiline_header(self):
2234 s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>'
2235 self.assertEqual(decode_header(s),
2236 [(b'"M\xfcller T"', 'windows-1252'),
2237 (b'<T.Mueller@xxx.com>', None)])
2238 self.assertEqual(make_header(decode_header(s)).encode(),
2239 ''.join(s.splitlines()))
2240 self.assertEqual(str(make_header(decode_header(s))),
2241 '"Müller T" <T.Mueller@xxx.com>')
2242
Ezio Melottib3aedd42010-11-20 19:04:17 +00002243
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002244# Test the MIMEMessage class
2245class TestMIMEMessage(TestEmailBase):
2246 def setUp(self):
2247 with openfile('msg_11.txt') as fp:
2248 self._text = fp.read()
2249
2250 def test_type_error(self):
2251 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2252
2253 def test_valid_argument(self):
2254 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002255 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002256 subject = 'A sub-message'
2257 m = Message()
2258 m['Subject'] = subject
2259 r = MIMEMessage(m)
2260 eq(r.get_content_type(), 'message/rfc822')
2261 payload = r.get_payload()
2262 unless(isinstance(payload, list))
2263 eq(len(payload), 1)
2264 subpart = payload[0]
2265 unless(subpart is m)
2266 eq(subpart['subject'], subject)
2267
2268 def test_bad_multipart(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002269 msg1 = Message()
2270 msg1['Subject'] = 'subpart 1'
2271 msg2 = Message()
2272 msg2['Subject'] = 'subpart 2'
2273 r = MIMEMessage(msg1)
2274 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2275
2276 def test_generate(self):
2277 # First craft the message to be encapsulated
2278 m = Message()
2279 m['Subject'] = 'An enclosed message'
2280 m.set_payload('Here is the body of the message.\n')
2281 r = MIMEMessage(m)
2282 r['Subject'] = 'The enclosing message'
2283 s = StringIO()
2284 g = Generator(s)
2285 g.flatten(r)
2286 self.assertEqual(s.getvalue(), """\
2287Content-Type: message/rfc822
2288MIME-Version: 1.0
2289Subject: The enclosing message
2290
2291Subject: An enclosed message
2292
2293Here is the body of the message.
2294""")
2295
2296 def test_parse_message_rfc822(self):
2297 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002298 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002299 msg = self._msgobj('msg_11.txt')
2300 eq(msg.get_content_type(), 'message/rfc822')
2301 payload = msg.get_payload()
2302 unless(isinstance(payload, list))
2303 eq(len(payload), 1)
2304 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002305 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002306 eq(submsg['subject'], 'An enclosed message')
2307 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2308
2309 def test_dsn(self):
2310 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002311 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002312 # msg 16 is a Delivery Status Notification, see RFC 1894
2313 msg = self._msgobj('msg_16.txt')
2314 eq(msg.get_content_type(), 'multipart/report')
2315 unless(msg.is_multipart())
2316 eq(len(msg.get_payload()), 3)
2317 # Subpart 1 is a text/plain, human readable section
2318 subpart = msg.get_payload(0)
2319 eq(subpart.get_content_type(), 'text/plain')
2320 eq(subpart.get_payload(), """\
2321This report relates to a message you sent with the following header fields:
2322
2323 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2324 Date: Sun, 23 Sep 2001 20:10:55 -0700
2325 From: "Ian T. Henry" <henryi@oxy.edu>
2326 To: SoCal Raves <scr@socal-raves.org>
2327 Subject: [scr] yeah for Ians!!
2328
2329Your message cannot be delivered to the following recipients:
2330
2331 Recipient address: jangel1@cougar.noc.ucla.edu
2332 Reason: recipient reached disk quota
2333
2334""")
2335 # Subpart 2 contains the machine parsable DSN information. It
2336 # consists of two blocks of headers, represented by two nested Message
2337 # objects.
2338 subpart = msg.get_payload(1)
2339 eq(subpart.get_content_type(), 'message/delivery-status')
2340 eq(len(subpart.get_payload()), 2)
2341 # message/delivery-status should treat each block as a bunch of
2342 # headers, i.e. a bunch of Message objects.
2343 dsn1 = subpart.get_payload(0)
2344 unless(isinstance(dsn1, Message))
2345 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2346 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2347 # Try a missing one <wink>
2348 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2349 dsn2 = subpart.get_payload(1)
2350 unless(isinstance(dsn2, Message))
2351 eq(dsn2['action'], 'failed')
2352 eq(dsn2.get_params(header='original-recipient'),
2353 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2354 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2355 # Subpart 3 is the original message
2356 subpart = msg.get_payload(2)
2357 eq(subpart.get_content_type(), 'message/rfc822')
2358 payload = subpart.get_payload()
2359 unless(isinstance(payload, list))
2360 eq(len(payload), 1)
2361 subsubpart = payload[0]
2362 unless(isinstance(subsubpart, Message))
2363 eq(subsubpart.get_content_type(), 'text/plain')
2364 eq(subsubpart['message-id'],
2365 '<002001c144a6$8752e060$56104586@oxy.edu>')
2366
2367 def test_epilogue(self):
2368 eq = self.ndiffAssertEqual
2369 with openfile('msg_21.txt') as fp:
2370 text = fp.read()
2371 msg = Message()
2372 msg['From'] = 'aperson@dom.ain'
2373 msg['To'] = 'bperson@dom.ain'
2374 msg['Subject'] = 'Test'
2375 msg.preamble = 'MIME message'
2376 msg.epilogue = 'End of MIME message\n'
2377 msg1 = MIMEText('One')
2378 msg2 = MIMEText('Two')
2379 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2380 msg.attach(msg1)
2381 msg.attach(msg2)
2382 sfp = StringIO()
2383 g = Generator(sfp)
2384 g.flatten(msg)
2385 eq(sfp.getvalue(), text)
2386
2387 def test_no_nl_preamble(self):
2388 eq = self.ndiffAssertEqual
2389 msg = Message()
2390 msg['From'] = 'aperson@dom.ain'
2391 msg['To'] = 'bperson@dom.ain'
2392 msg['Subject'] = 'Test'
2393 msg.preamble = 'MIME message'
2394 msg.epilogue = ''
2395 msg1 = MIMEText('One')
2396 msg2 = MIMEText('Two')
2397 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2398 msg.attach(msg1)
2399 msg.attach(msg2)
2400 eq(msg.as_string(), """\
2401From: aperson@dom.ain
2402To: bperson@dom.ain
2403Subject: Test
2404Content-Type: multipart/mixed; boundary="BOUNDARY"
2405
2406MIME message
2407--BOUNDARY
2408Content-Type: text/plain; charset="us-ascii"
2409MIME-Version: 1.0
2410Content-Transfer-Encoding: 7bit
2411
2412One
2413--BOUNDARY
2414Content-Type: text/plain; charset="us-ascii"
2415MIME-Version: 1.0
2416Content-Transfer-Encoding: 7bit
2417
2418Two
2419--BOUNDARY--
2420""")
2421
2422 def test_default_type(self):
2423 eq = self.assertEqual
2424 with openfile('msg_30.txt') as fp:
2425 msg = email.message_from_file(fp)
2426 container1 = msg.get_payload(0)
2427 eq(container1.get_default_type(), 'message/rfc822')
2428 eq(container1.get_content_type(), 'message/rfc822')
2429 container2 = msg.get_payload(1)
2430 eq(container2.get_default_type(), 'message/rfc822')
2431 eq(container2.get_content_type(), 'message/rfc822')
2432 container1a = container1.get_payload(0)
2433 eq(container1a.get_default_type(), 'text/plain')
2434 eq(container1a.get_content_type(), 'text/plain')
2435 container2a = container2.get_payload(0)
2436 eq(container2a.get_default_type(), 'text/plain')
2437 eq(container2a.get_content_type(), 'text/plain')
2438
2439 def test_default_type_with_explicit_container_type(self):
2440 eq = self.assertEqual
2441 with openfile('msg_28.txt') as fp:
2442 msg = email.message_from_file(fp)
2443 container1 = msg.get_payload(0)
2444 eq(container1.get_default_type(), 'message/rfc822')
2445 eq(container1.get_content_type(), 'message/rfc822')
2446 container2 = msg.get_payload(1)
2447 eq(container2.get_default_type(), 'message/rfc822')
2448 eq(container2.get_content_type(), 'message/rfc822')
2449 container1a = container1.get_payload(0)
2450 eq(container1a.get_default_type(), 'text/plain')
2451 eq(container1a.get_content_type(), 'text/plain')
2452 container2a = container2.get_payload(0)
2453 eq(container2a.get_default_type(), 'text/plain')
2454 eq(container2a.get_content_type(), 'text/plain')
2455
2456 def test_default_type_non_parsed(self):
2457 eq = self.assertEqual
2458 neq = self.ndiffAssertEqual
2459 # Set up container
2460 container = MIMEMultipart('digest', 'BOUNDARY')
2461 container.epilogue = ''
2462 # Set up subparts
2463 subpart1a = MIMEText('message 1\n')
2464 subpart2a = MIMEText('message 2\n')
2465 subpart1 = MIMEMessage(subpart1a)
2466 subpart2 = MIMEMessage(subpart2a)
2467 container.attach(subpart1)
2468 container.attach(subpart2)
2469 eq(subpart1.get_content_type(), 'message/rfc822')
2470 eq(subpart1.get_default_type(), 'message/rfc822')
2471 eq(subpart2.get_content_type(), 'message/rfc822')
2472 eq(subpart2.get_default_type(), 'message/rfc822')
2473 neq(container.as_string(0), '''\
2474Content-Type: multipart/digest; boundary="BOUNDARY"
2475MIME-Version: 1.0
2476
2477--BOUNDARY
2478Content-Type: message/rfc822
2479MIME-Version: 1.0
2480
2481Content-Type: text/plain; charset="us-ascii"
2482MIME-Version: 1.0
2483Content-Transfer-Encoding: 7bit
2484
2485message 1
2486
2487--BOUNDARY
2488Content-Type: message/rfc822
2489MIME-Version: 1.0
2490
2491Content-Type: text/plain; charset="us-ascii"
2492MIME-Version: 1.0
2493Content-Transfer-Encoding: 7bit
2494
2495message 2
2496
2497--BOUNDARY--
2498''')
2499 del subpart1['content-type']
2500 del subpart1['mime-version']
2501 del subpart2['content-type']
2502 del subpart2['mime-version']
2503 eq(subpart1.get_content_type(), 'message/rfc822')
2504 eq(subpart1.get_default_type(), 'message/rfc822')
2505 eq(subpart2.get_content_type(), 'message/rfc822')
2506 eq(subpart2.get_default_type(), 'message/rfc822')
2507 neq(container.as_string(0), '''\
2508Content-Type: multipart/digest; boundary="BOUNDARY"
2509MIME-Version: 1.0
2510
2511--BOUNDARY
2512
2513Content-Type: text/plain; charset="us-ascii"
2514MIME-Version: 1.0
2515Content-Transfer-Encoding: 7bit
2516
2517message 1
2518
2519--BOUNDARY
2520
2521Content-Type: text/plain; charset="us-ascii"
2522MIME-Version: 1.0
2523Content-Transfer-Encoding: 7bit
2524
2525message 2
2526
2527--BOUNDARY--
2528''')
2529
2530 def test_mime_attachments_in_constructor(self):
2531 eq = self.assertEqual
2532 text1 = MIMEText('')
2533 text2 = MIMEText('')
2534 msg = MIMEMultipart(_subparts=(text1, text2))
2535 eq(len(msg.get_payload()), 2)
2536 eq(msg.get_payload(0), text1)
2537 eq(msg.get_payload(1), text2)
2538
Christian Heimes587c2bf2008-01-19 16:21:02 +00002539 def test_default_multipart_constructor(self):
2540 msg = MIMEMultipart()
2541 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002542
Ezio Melottib3aedd42010-11-20 19:04:17 +00002543
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002544# A general test of parser->model->generator idempotency. IOW, read a message
2545# in, parse it into a message object tree, then without touching the tree,
2546# regenerate the plain text. The original text and the transformed text
2547# should be identical. Note: that we ignore the Unix-From since that may
2548# contain a changed date.
2549class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002550
2551 linesep = '\n'
2552
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002553 def _msgobj(self, filename):
2554 with openfile(filename) as fp:
2555 data = fp.read()
2556 msg = email.message_from_string(data)
2557 return msg, data
2558
R. David Murray719a4492010-11-21 16:53:48 +00002559 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002560 eq = self.ndiffAssertEqual
2561 s = StringIO()
2562 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002563 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002564 eq(text, s.getvalue())
2565
2566 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002567 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002568 msg, text = self._msgobj('msg_01.txt')
2569 eq(msg.get_content_type(), 'text/plain')
2570 eq(msg.get_content_maintype(), 'text')
2571 eq(msg.get_content_subtype(), 'plain')
2572 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2573 eq(msg.get_param('charset'), 'us-ascii')
2574 eq(msg.preamble, None)
2575 eq(msg.epilogue, None)
2576 self._idempotent(msg, text)
2577
2578 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002579 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002580 msg, text = self._msgobj('msg_03.txt')
2581 eq(msg.get_content_type(), 'text/plain')
2582 eq(msg.get_params(), None)
2583 eq(msg.get_param('charset'), None)
2584 self._idempotent(msg, text)
2585
2586 def test_simple_multipart(self):
2587 msg, text = self._msgobj('msg_04.txt')
2588 self._idempotent(msg, text)
2589
2590 def test_MIME_digest(self):
2591 msg, text = self._msgobj('msg_02.txt')
2592 self._idempotent(msg, text)
2593
2594 def test_long_header(self):
2595 msg, text = self._msgobj('msg_27.txt')
2596 self._idempotent(msg, text)
2597
2598 def test_MIME_digest_with_part_headers(self):
2599 msg, text = self._msgobj('msg_28.txt')
2600 self._idempotent(msg, text)
2601
2602 def test_mixed_with_image(self):
2603 msg, text = self._msgobj('msg_06.txt')
2604 self._idempotent(msg, text)
2605
2606 def test_multipart_report(self):
2607 msg, text = self._msgobj('msg_05.txt')
2608 self._idempotent(msg, text)
2609
2610 def test_dsn(self):
2611 msg, text = self._msgobj('msg_16.txt')
2612 self._idempotent(msg, text)
2613
2614 def test_preamble_epilogue(self):
2615 msg, text = self._msgobj('msg_21.txt')
2616 self._idempotent(msg, text)
2617
2618 def test_multipart_one_part(self):
2619 msg, text = self._msgobj('msg_23.txt')
2620 self._idempotent(msg, text)
2621
2622 def test_multipart_no_parts(self):
2623 msg, text = self._msgobj('msg_24.txt')
2624 self._idempotent(msg, text)
2625
2626 def test_no_start_boundary(self):
2627 msg, text = self._msgobj('msg_31.txt')
2628 self._idempotent(msg, text)
2629
2630 def test_rfc2231_charset(self):
2631 msg, text = self._msgobj('msg_32.txt')
2632 self._idempotent(msg, text)
2633
2634 def test_more_rfc2231_parameters(self):
2635 msg, text = self._msgobj('msg_33.txt')
2636 self._idempotent(msg, text)
2637
2638 def test_text_plain_in_a_multipart_digest(self):
2639 msg, text = self._msgobj('msg_34.txt')
2640 self._idempotent(msg, text)
2641
2642 def test_nested_multipart_mixeds(self):
2643 msg, text = self._msgobj('msg_12a.txt')
2644 self._idempotent(msg, text)
2645
2646 def test_message_external_body_idempotent(self):
2647 msg, text = self._msgobj('msg_36.txt')
2648 self._idempotent(msg, text)
2649
R. David Murray719a4492010-11-21 16:53:48 +00002650 def test_message_delivery_status(self):
2651 msg, text = self._msgobj('msg_43.txt')
2652 self._idempotent(msg, text, unixfrom=True)
2653
R. David Murray96fd54e2010-10-08 15:55:28 +00002654 def test_message_signed_idempotent(self):
2655 msg, text = self._msgobj('msg_45.txt')
2656 self._idempotent(msg, text)
2657
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002658 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002659 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002660 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002661 # Get a message object and reset the seek pointer for other tests
2662 msg, text = self._msgobj('msg_05.txt')
2663 eq(msg.get_content_type(), 'multipart/report')
2664 # Test the Content-Type: parameters
2665 params = {}
2666 for pk, pv in msg.get_params():
2667 params[pk] = pv
2668 eq(params['report-type'], 'delivery-status')
2669 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002670 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2671 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002672 eq(len(msg.get_payload()), 3)
2673 # Make sure the subparts are what we expect
2674 msg1 = msg.get_payload(0)
2675 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002676 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002677 msg2 = msg.get_payload(1)
2678 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002679 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002680 msg3 = msg.get_payload(2)
2681 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002682 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002683 payload = msg3.get_payload()
2684 unless(isinstance(payload, list))
2685 eq(len(payload), 1)
2686 msg4 = payload[0]
2687 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002688 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002689
2690 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002691 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002692 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002693 msg, text = self._msgobj('msg_06.txt')
2694 # Check some of the outer headers
2695 eq(msg.get_content_type(), 'message/rfc822')
2696 # Make sure the payload is a list of exactly one sub-Message, and that
2697 # that submessage has a type of text/plain
2698 payload = msg.get_payload()
2699 unless(isinstance(payload, list))
2700 eq(len(payload), 1)
2701 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002702 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002703 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002704 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002705 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002706
2707
Ezio Melottib3aedd42010-11-20 19:04:17 +00002708
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002709# Test various other bits of the package's functionality
2710class TestMiscellaneous(TestEmailBase):
2711 def test_message_from_string(self):
2712 with openfile('msg_01.txt') as fp:
2713 text = fp.read()
2714 msg = email.message_from_string(text)
2715 s = StringIO()
2716 # Don't wrap/continue long headers since we're trying to test
2717 # idempotency.
2718 g = Generator(s, maxheaderlen=0)
2719 g.flatten(msg)
2720 self.assertEqual(text, s.getvalue())
2721
2722 def test_message_from_file(self):
2723 with openfile('msg_01.txt') as fp:
2724 text = fp.read()
2725 fp.seek(0)
2726 msg = email.message_from_file(fp)
2727 s = StringIO()
2728 # Don't wrap/continue long headers since we're trying to test
2729 # idempotency.
2730 g = Generator(s, maxheaderlen=0)
2731 g.flatten(msg)
2732 self.assertEqual(text, s.getvalue())
2733
2734 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002735 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002736 with openfile('msg_01.txt') as fp:
2737 text = fp.read()
2738
2739 # Create a subclass
2740 class MyMessage(Message):
2741 pass
2742
2743 msg = email.message_from_string(text, MyMessage)
2744 unless(isinstance(msg, MyMessage))
2745 # Try something more complicated
2746 with openfile('msg_02.txt') as fp:
2747 text = fp.read()
2748 msg = email.message_from_string(text, MyMessage)
2749 for subpart in msg.walk():
2750 unless(isinstance(subpart, MyMessage))
2751
2752 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002753 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002754 # Create a subclass
2755 class MyMessage(Message):
2756 pass
2757
2758 with openfile('msg_01.txt') as fp:
2759 msg = email.message_from_file(fp, MyMessage)
2760 unless(isinstance(msg, MyMessage))
2761 # Try something more complicated
2762 with openfile('msg_02.txt') as fp:
2763 msg = email.message_from_file(fp, MyMessage)
2764 for subpart in msg.walk():
2765 unless(isinstance(subpart, MyMessage))
2766
R David Murrayc27e5222012-05-25 15:01:48 -04002767 def test_custom_message_does_not_require_arguments(self):
2768 class MyMessage(Message):
2769 def __init__(self):
2770 super().__init__()
2771 msg = self._str_msg("Subject: test\n\ntest", MyMessage)
2772 self.assertTrue(isinstance(msg, MyMessage))
2773
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002774 def test__all__(self):
2775 module = __import__('email')
R David Murray1b6c7242012-03-16 22:43:05 -04002776 self.assertEqual(sorted(module.__all__), [
2777 'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2778 'generator', 'header', 'iterators', 'message',
2779 'message_from_binary_file', 'message_from_bytes',
2780 'message_from_file', 'message_from_string', 'mime', 'parser',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002781 'quoprimime', 'utils',
2782 ])
2783
2784 def test_formatdate(self):
2785 now = time.time()
2786 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2787 time.gmtime(now)[:6])
2788
2789 def test_formatdate_localtime(self):
2790 now = time.time()
2791 self.assertEqual(
2792 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2793 time.localtime(now)[:6])
2794
2795 def test_formatdate_usegmt(self):
2796 now = time.time()
2797 self.assertEqual(
2798 utils.formatdate(now, localtime=False),
2799 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2800 self.assertEqual(
2801 utils.formatdate(now, localtime=False, usegmt=True),
2802 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2803
Georg Brandl1aca31e2012-09-22 09:03:56 +02002804 # parsedate and parsedate_tz will become deprecated interfaces someday
2805 def test_parsedate_returns_None_for_invalid_strings(self):
2806 self.assertIsNone(utils.parsedate(''))
2807 self.assertIsNone(utils.parsedate_tz(''))
2808 self.assertIsNone(utils.parsedate('0'))
2809 self.assertIsNone(utils.parsedate_tz('0'))
2810 self.assertIsNone(utils.parsedate('A Complete Waste of Time'))
2811 self.assertIsNone(utils.parsedate_tz('A Complete Waste of Time'))
2812 # Not a part of the spec but, but this has historically worked:
2813 self.assertIsNone(utils.parsedate(None))
2814 self.assertIsNone(utils.parsedate_tz(None))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002815
2816 def test_parsedate_compact(self):
2817 # The FWS after the comma is optional
2818 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2819 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2820
2821 def test_parsedate_no_dayofweek(self):
2822 eq = self.assertEqual
2823 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2824 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2825
2826 def test_parsedate_compact_no_dayofweek(self):
2827 eq = self.assertEqual
2828 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2829 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2830
R. David Murray4a62e892010-12-23 20:35:46 +00002831 def test_parsedate_no_space_before_positive_offset(self):
2832 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2833 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2834
2835 def test_parsedate_no_space_before_negative_offset(self):
2836 # Issue 1155362: we already handled '+' for this case.
2837 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2838 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2839
2840
R David Murrayaccd1c02011-03-13 20:06:23 -04002841 def test_parsedate_accepts_time_with_dots(self):
2842 eq = self.assertEqual
2843 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2844 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2845 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2846 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2847
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002848 def test_parsedate_acceptable_to_time_functions(self):
2849 eq = self.assertEqual
2850 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2851 t = int(time.mktime(timetup))
2852 eq(time.localtime(t)[:6], timetup[:6])
2853 eq(int(time.strftime('%Y', timetup)), 2003)
2854 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2855 t = int(time.mktime(timetup[:9]))
2856 eq(time.localtime(t)[:6], timetup[:6])
2857 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2858
Alexander Belopolskya07548e2012-06-21 20:34:09 -04002859 def test_mktime_tz(self):
2860 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2861 -1, -1, -1, 0)), 0)
2862 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2863 -1, -1, -1, 1234)), -1234)
2864
R. David Murray219d1c82010-08-25 00:45:55 +00002865 def test_parsedate_y2k(self):
2866 """Test for parsing a date with a two-digit year.
2867
2868 Parsing a date with a two-digit year should return the correct
2869 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2870 obsoletes RFC822) requires four-digit years.
2871
2872 """
2873 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2874 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2875 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2876 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2877
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002878 def test_parseaddr_empty(self):
2879 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2880 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2881
2882 def test_noquote_dump(self):
2883 self.assertEqual(
2884 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2885 'A Silly Person <person@dom.ain>')
2886
2887 def test_escape_dump(self):
2888 self.assertEqual(
2889 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
R David Murrayb53319f2012-03-14 15:31:47 -04002890 r'"A (Very) Silly Person" <person@dom.ain>')
2891 self.assertEqual(
2892 utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
2893 ('A (Very) Silly Person', 'person@dom.ain'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002894 a = r'A \(Special\) Person'
2895 b = 'person@dom.ain'
2896 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2897
2898 def test_escape_backslashes(self):
2899 self.assertEqual(
2900 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2901 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2902 a = r'Arthur \Backslash\ Foobar'
2903 b = 'person@dom.ain'
2904 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2905
R David Murray8debacb2011-04-06 09:35:57 -04002906 def test_quotes_unicode_names(self):
2907 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2908 name = "H\u00e4ns W\u00fcrst"
2909 addr = 'person@dom.ain'
2910 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2911 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
2912 self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
2913 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
2914 latin1_quopri)
2915
2916 def test_accepts_any_charset_like_object(self):
2917 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2918 name = "H\u00e4ns W\u00fcrst"
2919 addr = 'person@dom.ain'
2920 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2921 foobar = "FOOBAR"
2922 class CharsetMock:
2923 def header_encode(self, string):
2924 return foobar
2925 mock = CharsetMock()
2926 mock_expected = "%s <%s>" % (foobar, addr)
2927 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
2928 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
2929 utf8_base64)
2930
2931 def test_invalid_charset_like_object_raises_error(self):
2932 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2933 name = "H\u00e4ns W\u00fcrst"
2934 addr = 'person@dom.ain'
2935 # A object without a header_encode method:
2936 bad_charset = object()
2937 self.assertRaises(AttributeError, utils.formataddr, (name, addr),
2938 bad_charset)
2939
2940 def test_unicode_address_raises_error(self):
2941 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2942 addr = 'pers\u00f6n@dom.in'
2943 self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
2944 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
2945
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002946 def test_name_with_dot(self):
2947 x = 'John X. Doe <jxd@example.com>'
2948 y = '"John X. Doe" <jxd@example.com>'
2949 a, b = ('John X. Doe', 'jxd@example.com')
2950 self.assertEqual(utils.parseaddr(x), (a, b))
2951 self.assertEqual(utils.parseaddr(y), (a, b))
2952 # formataddr() quotes the name if there's a dot in it
2953 self.assertEqual(utils.formataddr((a, b)), y)
2954
R. David Murray5397e862010-10-02 15:58:26 +00002955 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2956 # issue 10005. Note that in the third test the second pair of
2957 # backslashes is not actually a quoted pair because it is not inside a
2958 # comment or quoted string: the address being parsed has a quoted
2959 # string containing a quoted backslash, followed by 'example' and two
2960 # backslashes, followed by another quoted string containing a space and
2961 # the word 'example'. parseaddr copies those two backslashes
2962 # literally. Per rfc5322 this is not technically correct since a \ may
2963 # not appear in an address outside of a quoted string. It is probably
2964 # a sensible Postel interpretation, though.
2965 eq = self.assertEqual
2966 eq(utils.parseaddr('""example" example"@example.com'),
2967 ('', '""example" example"@example.com'))
2968 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2969 ('', '"\\"example\\" example"@example.com'))
2970 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2971 ('', '"\\\\"example\\\\" example"@example.com'))
2972
R. David Murray63563cd2010-12-18 18:25:38 +00002973 def test_parseaddr_preserves_spaces_in_local_part(self):
2974 # issue 9286. A normal RFC5322 local part should not contain any
2975 # folding white space, but legacy local parts can (they are a sequence
2976 # of atoms, not dotatoms). On the other hand we strip whitespace from
2977 # before the @ and around dots, on the assumption that the whitespace
2978 # around the punctuation is a mistake in what would otherwise be
2979 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2980 self.assertEqual(('', "merwok wok@xample.com"),
2981 utils.parseaddr("merwok wok@xample.com"))
2982 self.assertEqual(('', "merwok wok@xample.com"),
2983 utils.parseaddr("merwok wok@xample.com"))
2984 self.assertEqual(('', "merwok wok@xample.com"),
2985 utils.parseaddr(" merwok wok @xample.com"))
2986 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2987 utils.parseaddr('merwok"wok" wok@xample.com'))
2988 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2989 utils.parseaddr('merwok. wok . wok@xample.com'))
2990
R David Murrayb53319f2012-03-14 15:31:47 -04002991 def test_formataddr_does_not_quote_parens_in_quoted_string(self):
2992 addr = ("'foo@example.com' (foo@example.com)",
2993 'foo@example.com')
2994 addrstr = ('"\'foo@example.com\' '
2995 '(foo@example.com)" <foo@example.com>')
2996 self.assertEqual(utils.parseaddr(addrstr), addr)
2997 self.assertEqual(utils.formataddr(addr), addrstr)
2998
2999
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003000 def test_multiline_from_comment(self):
3001 x = """\
3002Foo
3003\tBar <foo@example.com>"""
3004 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
3005
3006 def test_quote_dump(self):
3007 self.assertEqual(
3008 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
3009 r'"A Silly; Person" <person@dom.ain>')
3010
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003011 def test_charset_richcomparisons(self):
3012 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003013 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003014 cset1 = Charset()
3015 cset2 = Charset()
3016 eq(cset1, 'us-ascii')
3017 eq(cset1, 'US-ASCII')
3018 eq(cset1, 'Us-AsCiI')
3019 eq('us-ascii', cset1)
3020 eq('US-ASCII', cset1)
3021 eq('Us-AsCiI', cset1)
3022 ne(cset1, 'usascii')
3023 ne(cset1, 'USASCII')
3024 ne(cset1, 'UsAsCiI')
3025 ne('usascii', cset1)
3026 ne('USASCII', cset1)
3027 ne('UsAsCiI', cset1)
3028 eq(cset1, cset2)
3029 eq(cset2, cset1)
3030
3031 def test_getaddresses(self):
3032 eq = self.assertEqual
3033 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
3034 'Bud Person <bperson@dom.ain>']),
3035 [('Al Person', 'aperson@dom.ain'),
3036 ('Bud Person', 'bperson@dom.ain')])
3037
3038 def test_getaddresses_nasty(self):
3039 eq = self.assertEqual
3040 eq(utils.getaddresses(['foo: ;']), [('', '')])
3041 eq(utils.getaddresses(
3042 ['[]*-- =~$']),
3043 [('', ''), ('', ''), ('', '*--')])
3044 eq(utils.getaddresses(
3045 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
3046 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
3047
3048 def test_getaddresses_embedded_comment(self):
3049 """Test proper handling of a nested comment"""
3050 eq = self.assertEqual
3051 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
3052 eq(addrs[0][1], 'foo@bar.com')
3053
3054 def test_utils_quote_unquote(self):
3055 eq = self.assertEqual
3056 msg = Message()
3057 msg.add_header('content-disposition', 'attachment',
3058 filename='foo\\wacky"name')
3059 eq(msg.get_filename(), 'foo\\wacky"name')
3060
3061 def test_get_body_encoding_with_bogus_charset(self):
3062 charset = Charset('not a charset')
3063 self.assertEqual(charset.get_body_encoding(), 'base64')
3064
3065 def test_get_body_encoding_with_uppercase_charset(self):
3066 eq = self.assertEqual
3067 msg = Message()
3068 msg['Content-Type'] = 'text/plain; charset=UTF-8'
3069 eq(msg['content-type'], 'text/plain; charset=UTF-8')
3070 charsets = msg.get_charsets()
3071 eq(len(charsets), 1)
3072 eq(charsets[0], 'utf-8')
3073 charset = Charset(charsets[0])
3074 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003075 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003076 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
3077 eq(msg.get_payload(decode=True), b'hello world')
3078 eq(msg['content-transfer-encoding'], 'base64')
3079 # Try another one
3080 msg = Message()
3081 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
3082 charsets = msg.get_charsets()
3083 eq(len(charsets), 1)
3084 eq(charsets[0], 'us-ascii')
3085 charset = Charset(charsets[0])
3086 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
3087 msg.set_payload('hello world', charset=charset)
3088 eq(msg.get_payload(), 'hello world')
3089 eq(msg['content-transfer-encoding'], '7bit')
3090
3091 def test_charsets_case_insensitive(self):
3092 lc = Charset('us-ascii')
3093 uc = Charset('US-ASCII')
3094 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
3095
3096 def test_partial_falls_inside_message_delivery_status(self):
3097 eq = self.ndiffAssertEqual
3098 # The Parser interface provides chunks of data to FeedParser in 8192
3099 # byte gulps. SF bug #1076485 found one of those chunks inside
3100 # message/delivery-status header block, which triggered an
3101 # unreadline() of NeedMoreData.
3102 msg = self._msgobj('msg_43.txt')
3103 sfp = StringIO()
3104 iterators._structure(msg, sfp)
3105 eq(sfp.getvalue(), """\
3106multipart/report
3107 text/plain
3108 message/delivery-status
3109 text/plain
3110 text/plain
3111 text/plain
3112 text/plain
3113 text/plain
3114 text/plain
3115 text/plain
3116 text/plain
3117 text/plain
3118 text/plain
3119 text/plain
3120 text/plain
3121 text/plain
3122 text/plain
3123 text/plain
3124 text/plain
3125 text/plain
3126 text/plain
3127 text/plain
3128 text/plain
3129 text/plain
3130 text/plain
3131 text/plain
3132 text/plain
3133 text/plain
3134 text/plain
3135 text/rfc822-headers
3136""")
3137
R. David Murraya0b44b52010-12-02 21:47:19 +00003138 def test_make_msgid_domain(self):
3139 self.assertEqual(
3140 email.utils.make_msgid(domain='testdomain-string')[-19:],
3141 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003142
R David Murraye67c6c52013-03-07 16:38:03 -05003143 def test_Generator_linend(self):
3144 # Issue 14645.
3145 with openfile('msg_26.txt', newline='\n') as f:
3146 msgtxt = f.read()
3147 msgtxt_nl = msgtxt.replace('\r\n', '\n')
3148 msg = email.message_from_string(msgtxt)
3149 s = StringIO()
3150 g = email.generator.Generator(s)
3151 g.flatten(msg)
3152 self.assertEqual(s.getvalue(), msgtxt_nl)
3153
3154 def test_BytesGenerator_linend(self):
3155 # Issue 14645.
3156 with openfile('msg_26.txt', newline='\n') as f:
3157 msgtxt = f.read()
3158 msgtxt_nl = msgtxt.replace('\r\n', '\n')
3159 msg = email.message_from_string(msgtxt_nl)
3160 s = BytesIO()
3161 g = email.generator.BytesGenerator(s)
3162 g.flatten(msg, linesep='\r\n')
3163 self.assertEqual(s.getvalue().decode('ascii'), msgtxt)
3164
3165 def test_BytesGenerator_linend_with_non_ascii(self):
3166 # Issue 14645.
3167 with openfile('msg_26.txt', 'rb') as f:
3168 msgtxt = f.read()
3169 msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6')
3170 msgtxt_nl = msgtxt.replace(b'\r\n', b'\n')
3171 msg = email.message_from_bytes(msgtxt_nl)
3172 s = BytesIO()
3173 g = email.generator.BytesGenerator(s)
3174 g.flatten(msg, linesep='\r\n')
3175 self.assertEqual(s.getvalue(), msgtxt)
3176
Ezio Melottib3aedd42010-11-20 19:04:17 +00003177
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003178# Test the iterator/generators
3179class TestIterators(TestEmailBase):
3180 def test_body_line_iterator(self):
3181 eq = self.assertEqual
3182 neq = self.ndiffAssertEqual
3183 # First a simple non-multipart message
3184 msg = self._msgobj('msg_01.txt')
3185 it = iterators.body_line_iterator(msg)
3186 lines = list(it)
3187 eq(len(lines), 6)
3188 neq(EMPTYSTRING.join(lines), msg.get_payload())
3189 # Now a more complicated multipart
3190 msg = self._msgobj('msg_02.txt')
3191 it = iterators.body_line_iterator(msg)
3192 lines = list(it)
3193 eq(len(lines), 43)
3194 with openfile('msg_19.txt') as fp:
3195 neq(EMPTYSTRING.join(lines), fp.read())
3196
3197 def test_typed_subpart_iterator(self):
3198 eq = self.assertEqual
3199 msg = self._msgobj('msg_04.txt')
3200 it = iterators.typed_subpart_iterator(msg, 'text')
3201 lines = []
3202 subparts = 0
3203 for subpart in it:
3204 subparts += 1
3205 lines.append(subpart.get_payload())
3206 eq(subparts, 2)
3207 eq(EMPTYSTRING.join(lines), """\
3208a simple kind of mirror
3209to reflect upon our own
3210a simple kind of mirror
3211to reflect upon our own
3212""")
3213
3214 def test_typed_subpart_iterator_default_type(self):
3215 eq = self.assertEqual
3216 msg = self._msgobj('msg_03.txt')
3217 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
3218 lines = []
3219 subparts = 0
3220 for subpart in it:
3221 subparts += 1
3222 lines.append(subpart.get_payload())
3223 eq(subparts, 1)
3224 eq(EMPTYSTRING.join(lines), """\
3225
3226Hi,
3227
3228Do you like this message?
3229
3230-Me
3231""")
3232
R. David Murray45bf773f2010-07-17 01:19:57 +00003233 def test_pushCR_LF(self):
3234 '''FeedParser BufferedSubFile.push() assumed it received complete
3235 line endings. A CR ending one push() followed by a LF starting
3236 the next push() added an empty line.
3237 '''
3238 imt = [
3239 ("a\r \n", 2),
3240 ("b", 0),
3241 ("c\n", 1),
3242 ("", 0),
3243 ("d\r\n", 1),
3244 ("e\r", 0),
3245 ("\nf", 1),
3246 ("\r\n", 1),
3247 ]
3248 from email.feedparser import BufferedSubFile, NeedMoreData
3249 bsf = BufferedSubFile()
3250 om = []
3251 nt = 0
3252 for il, n in imt:
3253 bsf.push(il)
3254 nt += n
3255 n1 = 0
3256 while True:
3257 ol = bsf.readline()
3258 if ol == NeedMoreData:
3259 break
3260 om.append(ol)
3261 n1 += 1
3262 self.assertTrue(n == n1)
3263 self.assertTrue(len(om) == nt)
3264 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
3265
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003266
Ezio Melottib3aedd42010-11-20 19:04:17 +00003267
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003268class TestParsers(TestEmailBase):
R David Murrayb35c8502011-04-13 16:46:05 -04003269
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003270 def test_header_parser(self):
3271 eq = self.assertEqual
3272 # Parse only the headers of a complex multipart MIME document
3273 with openfile('msg_02.txt') as fp:
3274 msg = HeaderParser().parse(fp)
3275 eq(msg['from'], 'ppp-request@zzz.org')
3276 eq(msg['to'], 'ppp@zzz.org')
3277 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003278 self.assertFalse(msg.is_multipart())
3279 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003280
R David Murrayb35c8502011-04-13 16:46:05 -04003281 def test_bytes_header_parser(self):
3282 eq = self.assertEqual
3283 # Parse only the headers of a complex multipart MIME document
3284 with openfile('msg_02.txt', 'rb') as fp:
3285 msg = email.parser.BytesHeaderParser().parse(fp)
3286 eq(msg['from'], 'ppp-request@zzz.org')
3287 eq(msg['to'], 'ppp@zzz.org')
3288 eq(msg.get_content_type(), 'multipart/mixed')
3289 self.assertFalse(msg.is_multipart())
3290 self.assertTrue(isinstance(msg.get_payload(), str))
3291 self.assertTrue(isinstance(msg.get_payload(decode=True), bytes))
3292
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003293 def test_whitespace_continuation(self):
3294 eq = self.assertEqual
3295 # This message contains a line after the Subject: header that has only
3296 # whitespace, but it is not empty!
3297 msg = email.message_from_string("""\
3298From: aperson@dom.ain
3299To: bperson@dom.ain
3300Subject: the next line has a space on it
3301\x20
3302Date: Mon, 8 Apr 2002 15:09:19 -0400
3303Message-ID: spam
3304
3305Here's the message body
3306""")
3307 eq(msg['subject'], 'the next line has a space on it\n ')
3308 eq(msg['message-id'], 'spam')
3309 eq(msg.get_payload(), "Here's the message body\n")
3310
3311 def test_whitespace_continuation_last_header(self):
3312 eq = self.assertEqual
3313 # Like the previous test, but the subject line is the last
3314 # header.
3315 msg = email.message_from_string("""\
3316From: aperson@dom.ain
3317To: bperson@dom.ain
3318Date: Mon, 8 Apr 2002 15:09:19 -0400
3319Message-ID: spam
3320Subject: the next line has a space on it
3321\x20
3322
3323Here's the message body
3324""")
3325 eq(msg['subject'], 'the next line has a space on it\n ')
3326 eq(msg['message-id'], 'spam')
3327 eq(msg.get_payload(), "Here's the message body\n")
3328
3329 def test_crlf_separation(self):
3330 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003331 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003332 msg = Parser().parse(fp)
3333 eq(len(msg.get_payload()), 2)
3334 part1 = msg.get_payload(0)
3335 eq(part1.get_content_type(), 'text/plain')
3336 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3337 part2 = msg.get_payload(1)
3338 eq(part2.get_content_type(), 'application/riscos')
3339
R. David Murray8451c4b2010-10-23 22:19:56 +00003340 def test_crlf_flatten(self):
3341 # Using newline='\n' preserves the crlfs in this input file.
3342 with openfile('msg_26.txt', newline='\n') as fp:
3343 text = fp.read()
3344 msg = email.message_from_string(text)
3345 s = StringIO()
3346 g = Generator(s)
3347 g.flatten(msg, linesep='\r\n')
3348 self.assertEqual(s.getvalue(), text)
3349
3350 maxDiff = None
3351
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003352 def test_multipart_digest_with_extra_mime_headers(self):
3353 eq = self.assertEqual
3354 neq = self.ndiffAssertEqual
3355 with openfile('msg_28.txt') as fp:
3356 msg = email.message_from_file(fp)
3357 # Structure is:
3358 # multipart/digest
3359 # message/rfc822
3360 # text/plain
3361 # message/rfc822
3362 # text/plain
3363 eq(msg.is_multipart(), 1)
3364 eq(len(msg.get_payload()), 2)
3365 part1 = msg.get_payload(0)
3366 eq(part1.get_content_type(), 'message/rfc822')
3367 eq(part1.is_multipart(), 1)
3368 eq(len(part1.get_payload()), 1)
3369 part1a = part1.get_payload(0)
3370 eq(part1a.is_multipart(), 0)
3371 eq(part1a.get_content_type(), 'text/plain')
3372 neq(part1a.get_payload(), 'message 1\n')
3373 # next message/rfc822
3374 part2 = msg.get_payload(1)
3375 eq(part2.get_content_type(), 'message/rfc822')
3376 eq(part2.is_multipart(), 1)
3377 eq(len(part2.get_payload()), 1)
3378 part2a = part2.get_payload(0)
3379 eq(part2a.is_multipart(), 0)
3380 eq(part2a.get_content_type(), 'text/plain')
3381 neq(part2a.get_payload(), 'message 2\n')
3382
3383 def test_three_lines(self):
3384 # A bug report by Andrew McNamara
3385 lines = ['From: Andrew Person <aperson@dom.ain',
3386 'Subject: Test',
3387 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3388 msg = email.message_from_string(NL.join(lines))
3389 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3390
3391 def test_strip_line_feed_and_carriage_return_in_headers(self):
3392 eq = self.assertEqual
3393 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3394 value1 = 'text'
3395 value2 = 'more text'
3396 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3397 value1, value2)
3398 msg = email.message_from_string(m)
3399 eq(msg.get('Header'), value1)
3400 eq(msg.get('Next-Header'), value2)
3401
3402 def test_rfc2822_header_syntax(self):
3403 eq = self.assertEqual
3404 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3405 msg = email.message_from_string(m)
3406 eq(len(msg), 3)
3407 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3408 eq(msg.get_payload(), 'body')
3409
3410 def test_rfc2822_space_not_allowed_in_header(self):
3411 eq = self.assertEqual
3412 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3413 msg = email.message_from_string(m)
3414 eq(len(msg.keys()), 0)
3415
3416 def test_rfc2822_one_character_header(self):
3417 eq = self.assertEqual
3418 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3419 msg = email.message_from_string(m)
3420 headers = msg.keys()
3421 headers.sort()
3422 eq(headers, ['A', 'B', 'CC'])
3423 eq(msg.get_payload(), 'body')
3424
R. David Murray45e0e142010-06-16 02:19:40 +00003425 def test_CRLFLF_at_end_of_part(self):
3426 # issue 5610: feedparser should not eat two chars from body part ending
3427 # with "\r\n\n".
3428 m = (
3429 "From: foo@bar.com\n"
3430 "To: baz\n"
3431 "Mime-Version: 1.0\n"
3432 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3433 "\n"
3434 "--BOUNDARY\n"
3435 "Content-Type: text/plain\n"
3436 "\n"
3437 "body ending with CRLF newline\r\n"
3438 "\n"
3439 "--BOUNDARY--\n"
3440 )
3441 msg = email.message_from_string(m)
3442 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003443
Ezio Melottib3aedd42010-11-20 19:04:17 +00003444
R. David Murray96fd54e2010-10-08 15:55:28 +00003445class Test8BitBytesHandling(unittest.TestCase):
3446 # In Python3 all input is string, but that doesn't work if the actual input
3447 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3448 # decode byte streams using the surrogateescape error handler, and
3449 # reconvert to binary at appropriate places if we detect surrogates. This
3450 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3451 # but it does allow us to parse and preserve them, and to decode body
3452 # parts that use an 8bit CTE.
3453
3454 bodytest_msg = textwrap.dedent("""\
3455 From: foo@bar.com
3456 To: baz
3457 Mime-Version: 1.0
3458 Content-Type: text/plain; charset={charset}
3459 Content-Transfer-Encoding: {cte}
3460
3461 {bodyline}
3462 """)
3463
3464 def test_known_8bit_CTE(self):
3465 m = self.bodytest_msg.format(charset='utf-8',
3466 cte='8bit',
3467 bodyline='pöstal').encode('utf-8')
3468 msg = email.message_from_bytes(m)
3469 self.assertEqual(msg.get_payload(), "pöstal\n")
3470 self.assertEqual(msg.get_payload(decode=True),
3471 "pöstal\n".encode('utf-8'))
3472
3473 def test_unknown_8bit_CTE(self):
3474 m = self.bodytest_msg.format(charset='notavalidcharset',
3475 cte='8bit',
3476 bodyline='pöstal').encode('utf-8')
3477 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003478 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003479 self.assertEqual(msg.get_payload(decode=True),
3480 "pöstal\n".encode('utf-8'))
3481
3482 def test_8bit_in_quopri_body(self):
3483 # This is non-RFC compliant data...without 'decode' the library code
3484 # decodes the body using the charset from the headers, and because the
3485 # source byte really is utf-8 this works. This is likely to fail
3486 # against real dirty data (ie: produce mojibake), but the data is
3487 # invalid anyway so it is as good a guess as any. But this means that
3488 # this test just confirms the current behavior; that behavior is not
3489 # necessarily the best possible behavior. With 'decode' it is
3490 # returning the raw bytes, so that test should be of correct behavior,
3491 # or at least produce the same result that email4 did.
3492 m = self.bodytest_msg.format(charset='utf-8',
3493 cte='quoted-printable',
3494 bodyline='p=C3=B6stál').encode('utf-8')
3495 msg = email.message_from_bytes(m)
3496 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3497 self.assertEqual(msg.get_payload(decode=True),
3498 'pöstál\n'.encode('utf-8'))
3499
3500 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3501 # This is similar to the previous test, but proves that if the 8bit
3502 # byte is undecodeable in the specified charset, it gets replaced
3503 # by the unicode 'unknown' character. Again, this may or may not
3504 # be the ideal behavior. Note that if decode=False none of the
3505 # decoders will get involved, so this is the only test we need
3506 # for this behavior.
3507 m = self.bodytest_msg.format(charset='ascii',
3508 cte='quoted-printable',
3509 bodyline='p=C3=B6stál').encode('utf-8')
3510 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003511 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003512 self.assertEqual(msg.get_payload(decode=True),
3513 'pöstál\n'.encode('utf-8'))
3514
R David Murray80e0aee2012-05-27 21:23:34 -04003515 # test_defect_handling:test_invalid_chars_in_base64_payload
R. David Murray96fd54e2010-10-08 15:55:28 +00003516 def test_8bit_in_base64_body(self):
R David Murray80e0aee2012-05-27 21:23:34 -04003517 # If we get 8bit bytes in a base64 body, we can just ignore them
3518 # as being outside the base64 alphabet and decode anyway. But
3519 # we register a defect.
R. David Murray96fd54e2010-10-08 15:55:28 +00003520 m = self.bodytest_msg.format(charset='utf-8',
3521 cte='base64',
3522 bodyline='cMO2c3RhbAá=').encode('utf-8')
3523 msg = email.message_from_bytes(m)
3524 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -04003525 'pöstal'.encode('utf-8'))
3526 self.assertIsInstance(msg.defects[0],
3527 errors.InvalidBase64CharactersDefect)
R. David Murray96fd54e2010-10-08 15:55:28 +00003528
3529 def test_8bit_in_uuencode_body(self):
3530 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3531 # normal means, so the block is returned undecoded, but as bytes.
3532 m = self.bodytest_msg.format(charset='utf-8',
3533 cte='uuencode',
3534 bodyline='<,.V<W1A; á ').encode('utf-8')
3535 msg = email.message_from_bytes(m)
3536 self.assertEqual(msg.get_payload(decode=True),
3537 '<,.V<W1A; á \n'.encode('utf-8'))
3538
3539
R. David Murray92532142011-01-07 23:25:30 +00003540 headertest_headers = (
3541 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3542 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3543 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3544 '\tJean de Baddie',
3545 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3546 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3547 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3548 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3549 )
3550 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3551 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003552
3553 def test_get_8bit_header(self):
3554 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003555 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3556 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003557
3558 def test_print_8bit_headers(self):
3559 msg = email.message_from_bytes(self.headertest_msg)
3560 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003561 textwrap.dedent("""\
3562 From: {}
3563 To: {}
3564 Subject: {}
3565 From: {}
3566
3567 Yes, they are flying.
3568 """).format(*[expected[1] for (_, expected) in
3569 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003570
3571 def test_values_with_8bit_headers(self):
3572 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003573 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003574 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003575 'b\uFFFD\uFFFDz',
3576 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3577 'coll\uFFFD\uFFFDgue, le pouf '
3578 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003579 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003580 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003581
3582 def test_items_with_8bit_headers(self):
3583 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003584 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003585 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003586 ('To', 'b\uFFFD\uFFFDz'),
3587 ('Subject', 'Maintenant je vous '
3588 'pr\uFFFD\uFFFDsente '
3589 'mon coll\uFFFD\uFFFDgue, le pouf '
3590 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3591 '\tJean de Baddie'),
3592 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003593
3594 def test_get_all_with_8bit_headers(self):
3595 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003596 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003597 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003598 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003599
R David Murraya2150232011-03-16 21:11:23 -04003600 def test_get_content_type_with_8bit(self):
3601 msg = email.message_from_bytes(textwrap.dedent("""\
3602 Content-Type: text/pl\xA7in; charset=utf-8
3603 """).encode('latin-1'))
3604 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3605 self.assertEqual(msg.get_content_maintype(), "text")
3606 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3607
R David Murray97f43c02012-06-24 05:03:27 -04003608 # test_headerregistry.TestContentTypeHeader.non_ascii_in_params
R David Murraya2150232011-03-16 21:11:23 -04003609 def test_get_params_with_8bit(self):
3610 msg = email.message_from_bytes(
3611 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3612 self.assertEqual(msg.get_params(header='x-header'),
3613 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3614 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3615 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3616 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3617
R David Murray97f43c02012-06-24 05:03:27 -04003618 # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value
R David Murraya2150232011-03-16 21:11:23 -04003619 def test_get_rfc2231_params_with_8bit(self):
3620 msg = email.message_from_bytes(textwrap.dedent("""\
3621 Content-Type: text/plain; charset=us-ascii;
3622 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3623 ).encode('latin-1'))
3624 self.assertEqual(msg.get_param('title'),
3625 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3626
3627 def test_set_rfc2231_params_with_8bit(self):
3628 msg = email.message_from_bytes(textwrap.dedent("""\
3629 Content-Type: text/plain; charset=us-ascii;
3630 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3631 ).encode('latin-1'))
3632 msg.set_param('title', 'test')
3633 self.assertEqual(msg.get_param('title'), 'test')
3634
3635 def test_del_rfc2231_params_with_8bit(self):
3636 msg = email.message_from_bytes(textwrap.dedent("""\
3637 Content-Type: text/plain; charset=us-ascii;
3638 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3639 ).encode('latin-1'))
3640 msg.del_param('title')
3641 self.assertEqual(msg.get_param('title'), None)
3642 self.assertEqual(msg.get_content_maintype(), 'text')
3643
3644 def test_get_payload_with_8bit_cte_header(self):
3645 msg = email.message_from_bytes(textwrap.dedent("""\
3646 Content-Transfer-Encoding: b\xa7se64
3647 Content-Type: text/plain; charset=latin-1
3648
3649 payload
3650 """).encode('latin-1'))
3651 self.assertEqual(msg.get_payload(), 'payload\n')
3652 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3653
R. David Murray96fd54e2010-10-08 15:55:28 +00003654 non_latin_bin_msg = textwrap.dedent("""\
3655 From: foo@bar.com
3656 To: báz
3657 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3658 \tJean de Baddie
3659 Mime-Version: 1.0
3660 Content-Type: text/plain; charset="utf-8"
3661 Content-Transfer-Encoding: 8bit
3662
3663 Да, они летят.
3664 """).encode('utf-8')
3665
3666 def test_bytes_generator(self):
3667 msg = email.message_from_bytes(self.non_latin_bin_msg)
3668 out = BytesIO()
3669 email.generator.BytesGenerator(out).flatten(msg)
3670 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3671
R. David Murray7372a072011-01-26 21:21:32 +00003672 def test_bytes_generator_handles_None_body(self):
3673 #Issue 11019
3674 msg = email.message.Message()
3675 out = BytesIO()
3676 email.generator.BytesGenerator(out).flatten(msg)
3677 self.assertEqual(out.getvalue(), b"\n")
3678
R. David Murray92532142011-01-07 23:25:30 +00003679 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003680 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003681 To: =?unknown-8bit?q?b=C3=A1z?=
3682 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3683 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3684 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003685 Mime-Version: 1.0
3686 Content-Type: text/plain; charset="utf-8"
3687 Content-Transfer-Encoding: base64
3688
3689 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3690 """)
3691
3692 def test_generator_handles_8bit(self):
3693 msg = email.message_from_bytes(self.non_latin_bin_msg)
3694 out = StringIO()
3695 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003696 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003697
3698 def test_bytes_generator_with_unix_from(self):
3699 # The unixfrom contains a current date, so we can't check it
3700 # literally. Just make sure the first word is 'From' and the
3701 # rest of the message matches the input.
3702 msg = email.message_from_bytes(self.non_latin_bin_msg)
3703 out = BytesIO()
3704 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3705 lines = out.getvalue().split(b'\n')
3706 self.assertEqual(lines[0].split()[0], b'From')
3707 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3708
R. David Murray92532142011-01-07 23:25:30 +00003709 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3710 non_latin_bin_msg_as7bit[2:4] = [
3711 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3712 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3713 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3714
R. David Murray96fd54e2010-10-08 15:55:28 +00003715 def test_message_from_binary_file(self):
3716 fn = 'test.msg'
3717 self.addCleanup(unlink, fn)
3718 with open(fn, 'wb') as testfile:
3719 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003720 with open(fn, 'rb') as testfile:
3721 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003722 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3723
3724 latin_bin_msg = textwrap.dedent("""\
3725 From: foo@bar.com
3726 To: Dinsdale
3727 Subject: Nudge nudge, wink, wink
3728 Mime-Version: 1.0
3729 Content-Type: text/plain; charset="latin-1"
3730 Content-Transfer-Encoding: 8bit
3731
3732 oh là là, know what I mean, know what I mean?
3733 """).encode('latin-1')
3734
3735 latin_bin_msg_as7bit = textwrap.dedent("""\
3736 From: foo@bar.com
3737 To: Dinsdale
3738 Subject: Nudge nudge, wink, wink
3739 Mime-Version: 1.0
3740 Content-Type: text/plain; charset="iso-8859-1"
3741 Content-Transfer-Encoding: quoted-printable
3742
3743 oh l=E0 l=E0, know what I mean, know what I mean?
3744 """)
3745
3746 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3747 m = email.message_from_bytes(self.latin_bin_msg)
3748 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3749
3750 def test_decoded_generator_emits_unicode_body(self):
3751 m = email.message_from_bytes(self.latin_bin_msg)
3752 out = StringIO()
3753 email.generator.DecodedGenerator(out).flatten(m)
3754 #DecodedHeader output contains an extra blank line compared
3755 #to the input message. RDM: not sure if this is a bug or not,
3756 #but it is not specific to the 8bit->7bit conversion.
3757 self.assertEqual(out.getvalue(),
3758 self.latin_bin_msg.decode('latin-1')+'\n')
3759
3760 def test_bytes_feedparser(self):
3761 bfp = email.feedparser.BytesFeedParser()
3762 for i in range(0, len(self.latin_bin_msg), 10):
3763 bfp.feed(self.latin_bin_msg[i:i+10])
3764 m = bfp.close()
3765 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3766
R. David Murray8451c4b2010-10-23 22:19:56 +00003767 def test_crlf_flatten(self):
3768 with openfile('msg_26.txt', 'rb') as fp:
3769 text = fp.read()
3770 msg = email.message_from_bytes(text)
3771 s = BytesIO()
3772 g = email.generator.BytesGenerator(s)
3773 g.flatten(msg, linesep='\r\n')
3774 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003775
3776 def test_8bit_multipart(self):
3777 # Issue 11605
3778 source = textwrap.dedent("""\
3779 Date: Fri, 18 Mar 2011 17:15:43 +0100
3780 To: foo@example.com
3781 From: foodwatch-Newsletter <bar@example.com>
3782 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3783 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3784 MIME-Version: 1.0
3785 Content-Type: multipart/alternative;
3786 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3787
3788 --b1_76a486bee62b0d200f33dc2ca08220ad
3789 Content-Type: text/plain; charset="utf-8"
3790 Content-Transfer-Encoding: 8bit
3791
3792 Guten Tag, ,
3793
3794 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3795 Nachrichten aus Japan.
3796
3797
3798 --b1_76a486bee62b0d200f33dc2ca08220ad
3799 Content-Type: text/html; charset="utf-8"
3800 Content-Transfer-Encoding: 8bit
3801
3802 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3803 "http://www.w3.org/TR/html4/loose.dtd">
3804 <html lang="de">
3805 <head>
3806 <title>foodwatch - Newsletter</title>
3807 </head>
3808 <body>
3809 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3810 die Nachrichten aus Japan.</p>
3811 </body>
3812 </html>
3813 --b1_76a486bee62b0d200f33dc2ca08220ad--
3814
3815 """).encode('utf-8')
3816 msg = email.message_from_bytes(source)
3817 s = BytesIO()
3818 g = email.generator.BytesGenerator(s)
3819 g.flatten(msg)
3820 self.assertEqual(s.getvalue(), source)
3821
R David Murray9fd170e2012-03-14 14:05:03 -04003822 def test_bytes_generator_b_encoding_linesep(self):
3823 # Issue 14062: b encoding was tacking on an extra \n.
3824 m = Message()
3825 # This has enough non-ascii that it should always end up b encoded.
3826 m['Subject'] = Header('žluťoučký kůň')
3827 s = BytesIO()
3828 g = email.generator.BytesGenerator(s)
3829 g.flatten(m, linesep='\r\n')
3830 self.assertEqual(
3831 s.getvalue(),
3832 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3833
3834 def test_generator_b_encoding_linesep(self):
3835 # Since this broke in ByteGenerator, test Generator for completeness.
3836 m = Message()
3837 # This has enough non-ascii that it should always end up b encoded.
3838 m['Subject'] = Header('žluťoučký kůň')
3839 s = StringIO()
3840 g = email.generator.Generator(s)
3841 g.flatten(m, linesep='\r\n')
3842 self.assertEqual(
3843 s.getvalue(),
3844 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3845
R. David Murray8451c4b2010-10-23 22:19:56 +00003846 maxDiff = None
3847
Ezio Melottib3aedd42010-11-20 19:04:17 +00003848
R. David Murray719a4492010-11-21 16:53:48 +00003849class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003850
R. David Murraye5db2632010-11-20 15:10:13 +00003851 maxDiff = None
3852
R. David Murray96fd54e2010-10-08 15:55:28 +00003853 def _msgobj(self, filename):
3854 with openfile(filename, 'rb') as fp:
3855 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003856 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003857 msg = email.message_from_bytes(data)
3858 return msg, data
3859
R. David Murray719a4492010-11-21 16:53:48 +00003860 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003861 b = BytesIO()
3862 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003863 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R David Murraya46ed112011-03-31 13:11:40 -04003864 self.assertEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003865
3866
R. David Murray719a4492010-11-21 16:53:48 +00003867class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3868 TestIdempotent):
3869 linesep = '\n'
3870 blinesep = b'\n'
3871 normalize_linesep_regex = re.compile(br'\r\n')
3872
3873
3874class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3875 TestIdempotent):
3876 linesep = '\r\n'
3877 blinesep = b'\r\n'
3878 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3879
Ezio Melottib3aedd42010-11-20 19:04:17 +00003880
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003881class TestBase64(unittest.TestCase):
3882 def test_len(self):
3883 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003884 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003885 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003886 for size in range(15):
3887 if size == 0 : bsize = 0
3888 elif size <= 3 : bsize = 4
3889 elif size <= 6 : bsize = 8
3890 elif size <= 9 : bsize = 12
3891 elif size <= 12: bsize = 16
3892 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003893 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003894
3895 def test_decode(self):
3896 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003897 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003898 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003899
3900 def test_encode(self):
3901 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003902 eq(base64mime.body_encode(b''), b'')
3903 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003904 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003905 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003906 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003907 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003908eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3909eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3910eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3911eHh4eCB4eHh4IA==
3912""")
3913 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003914 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003915 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003916eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3917eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3918eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3919eHh4eCB4eHh4IA==\r
3920""")
3921
3922 def test_header_encode(self):
3923 eq = self.assertEqual
3924 he = base64mime.header_encode
3925 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003926 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3927 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003928 # Test the charset option
3929 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3930 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003931
3932
Ezio Melottib3aedd42010-11-20 19:04:17 +00003933
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003934class TestQuopri(unittest.TestCase):
3935 def setUp(self):
3936 # Set of characters (as byte integers) that don't need to be encoded
3937 # in headers.
3938 self.hlit = list(chain(
3939 range(ord('a'), ord('z') + 1),
3940 range(ord('A'), ord('Z') + 1),
3941 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003942 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003943 # Set of characters (as byte integers) that do need to be encoded in
3944 # headers.
3945 self.hnon = [c for c in range(256) if c not in self.hlit]
3946 assert len(self.hlit) + len(self.hnon) == 256
3947 # Set of characters (as byte integers) that don't need to be encoded
3948 # in bodies.
3949 self.blit = list(range(ord(' '), ord('~') + 1))
3950 self.blit.append(ord('\t'))
3951 self.blit.remove(ord('='))
3952 # Set of characters (as byte integers) that do need to be encoded in
3953 # bodies.
3954 self.bnon = [c for c in range(256) if c not in self.blit]
3955 assert len(self.blit) + len(self.bnon) == 256
3956
Guido van Rossum9604e662007-08-30 03:46:43 +00003957 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003958 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003959 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003960 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003961 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003962 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003963 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003964
Guido van Rossum9604e662007-08-30 03:46:43 +00003965 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003966 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003967 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003968 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003969 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003970 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003971 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003972
3973 def test_header_quopri_len(self):
3974 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003975 eq(quoprimime.header_length(b'hello'), 5)
3976 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003977 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003978 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003979 # =?xxx?q?...?= means 10 extra characters
3980 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003981 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3982 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003983 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003984 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003985 # =?xxx?q?...?= means 10 extra characters
3986 10)
3987 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003988 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003989 'expected length 1 for %r' % chr(c))
3990 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003991 # Space is special; it's encoded to _
3992 if c == ord(' '):
3993 continue
3994 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003995 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003996 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003997
3998 def test_body_quopri_len(self):
3999 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004000 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00004001 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004002 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00004003 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004004
4005 def test_quote_unquote_idempotent(self):
4006 for x in range(256):
4007 c = chr(x)
4008 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
4009
R David Murrayec1b5b82011-03-23 14:19:05 -04004010 def _test_header_encode(self, header, expected_encoded_header, charset=None):
4011 if charset is None:
4012 encoded_header = quoprimime.header_encode(header)
4013 else:
4014 encoded_header = quoprimime.header_encode(header, charset)
4015 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004016
R David Murraycafd79d2011-03-23 15:25:55 -04004017 def test_header_encode_null(self):
4018 self._test_header_encode(b'', '')
4019
R David Murrayec1b5b82011-03-23 14:19:05 -04004020 def test_header_encode_one_word(self):
4021 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
4022
4023 def test_header_encode_two_lines(self):
4024 self._test_header_encode(b'hello\nworld',
4025 '=?iso-8859-1?q?hello=0Aworld?=')
4026
4027 def test_header_encode_non_ascii(self):
4028 self._test_header_encode(b'hello\xc7there',
4029 '=?iso-8859-1?q?hello=C7there?=')
4030
4031 def test_header_encode_alt_charset(self):
4032 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
4033 charset='iso-8859-2')
4034
4035 def _test_header_decode(self, encoded_header, expected_decoded_header):
4036 decoded_header = quoprimime.header_decode(encoded_header)
4037 self.assertEqual(decoded_header, expected_decoded_header)
4038
4039 def test_header_decode_null(self):
4040 self._test_header_decode('', '')
4041
4042 def test_header_decode_one_word(self):
4043 self._test_header_decode('hello', 'hello')
4044
4045 def test_header_decode_two_lines(self):
4046 self._test_header_decode('hello=0Aworld', 'hello\nworld')
4047
4048 def test_header_decode_non_ascii(self):
4049 self._test_header_decode('hello=C7there', 'hello\xc7there')
4050
Ezio Melotti2a99d5d2013-07-06 17:16:04 +02004051 def test_header_decode_re_bug_18380(self):
4052 # Issue 18380: Call re.sub with a positional argument for flags in the wrong position
4053 self.assertEqual(quoprimime.header_decode('=30' * 257), '0' * 257)
4054
R David Murrayec1b5b82011-03-23 14:19:05 -04004055 def _test_decode(self, encoded, expected_decoded, eol=None):
4056 if eol is None:
4057 decoded = quoprimime.decode(encoded)
4058 else:
4059 decoded = quoprimime.decode(encoded, eol=eol)
4060 self.assertEqual(decoded, expected_decoded)
4061
4062 def test_decode_null_word(self):
4063 self._test_decode('', '')
4064
4065 def test_decode_null_line_null_word(self):
4066 self._test_decode('\r\n', '\n')
4067
4068 def test_decode_one_word(self):
4069 self._test_decode('hello', 'hello')
4070
4071 def test_decode_one_word_eol(self):
4072 self._test_decode('hello', 'hello', eol='X')
4073
4074 def test_decode_one_line(self):
4075 self._test_decode('hello\r\n', 'hello\n')
4076
4077 def test_decode_one_line_lf(self):
4078 self._test_decode('hello\n', 'hello\n')
4079
R David Murraycafd79d2011-03-23 15:25:55 -04004080 def test_decode_one_line_cr(self):
4081 self._test_decode('hello\r', 'hello\n')
4082
4083 def test_decode_one_line_nl(self):
4084 self._test_decode('hello\n', 'helloX', eol='X')
4085
4086 def test_decode_one_line_crnl(self):
4087 self._test_decode('hello\r\n', 'helloX', eol='X')
4088
R David Murrayec1b5b82011-03-23 14:19:05 -04004089 def test_decode_one_line_one_word(self):
4090 self._test_decode('hello\r\nworld', 'hello\nworld')
4091
4092 def test_decode_one_line_one_word_eol(self):
4093 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
4094
4095 def test_decode_two_lines(self):
4096 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
4097
R David Murraycafd79d2011-03-23 15:25:55 -04004098 def test_decode_two_lines_eol(self):
4099 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
4100
R David Murrayec1b5b82011-03-23 14:19:05 -04004101 def test_decode_one_long_line(self):
4102 self._test_decode('Spam' * 250, 'Spam' * 250)
4103
4104 def test_decode_one_space(self):
4105 self._test_decode(' ', '')
4106
4107 def test_decode_multiple_spaces(self):
4108 self._test_decode(' ' * 5, '')
4109
4110 def test_decode_one_line_trailing_spaces(self):
4111 self._test_decode('hello \r\n', 'hello\n')
4112
4113 def test_decode_two_lines_trailing_spaces(self):
4114 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
4115
4116 def test_decode_quoted_word(self):
4117 self._test_decode('=22quoted=20words=22', '"quoted words"')
4118
4119 def test_decode_uppercase_quoting(self):
4120 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
4121
4122 def test_decode_lowercase_quoting(self):
4123 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
4124
4125 def test_decode_soft_line_break(self):
4126 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
4127
4128 def test_decode_false_quoting(self):
4129 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
4130
4131 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
4132 kwargs = {}
4133 if maxlinelen is None:
4134 # Use body_encode's default.
4135 maxlinelen = 76
4136 else:
4137 kwargs['maxlinelen'] = maxlinelen
4138 if eol is None:
4139 # Use body_encode's default.
4140 eol = '\n'
4141 else:
4142 kwargs['eol'] = eol
4143 encoded_body = quoprimime.body_encode(body, **kwargs)
4144 self.assertEqual(encoded_body, expected_encoded_body)
4145 if eol == '\n' or eol == '\r\n':
4146 # We know how to split the result back into lines, so maxlinelen
4147 # can be checked.
4148 for line in encoded_body.splitlines():
4149 self.assertLessEqual(len(line), maxlinelen)
4150
4151 def test_encode_null(self):
4152 self._test_encode('', '')
4153
4154 def test_encode_null_lines(self):
4155 self._test_encode('\n\n', '\n\n')
4156
4157 def test_encode_one_line(self):
4158 self._test_encode('hello\n', 'hello\n')
4159
4160 def test_encode_one_line_crlf(self):
4161 self._test_encode('hello\r\n', 'hello\n')
4162
4163 def test_encode_one_line_eol(self):
4164 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
4165
4166 def test_encode_one_space(self):
4167 self._test_encode(' ', '=20')
4168
4169 def test_encode_one_line_one_space(self):
4170 self._test_encode(' \n', '=20\n')
4171
R David Murrayb938c8c2011-03-24 12:19:26 -04004172# XXX: body_encode() expect strings, but uses ord(char) from these strings
4173# to index into a 256-entry list. For code points above 255, this will fail.
4174# Should there be a check for 8-bit only ord() values in body, or at least
4175# a comment about the expected input?
4176
4177 def test_encode_two_lines_one_space(self):
4178 self._test_encode(' \n \n', '=20\n=20\n')
4179
R David Murrayec1b5b82011-03-23 14:19:05 -04004180 def test_encode_one_word_trailing_spaces(self):
4181 self._test_encode('hello ', 'hello =20')
4182
4183 def test_encode_one_line_trailing_spaces(self):
4184 self._test_encode('hello \n', 'hello =20\n')
4185
4186 def test_encode_one_word_trailing_tab(self):
4187 self._test_encode('hello \t', 'hello =09')
4188
4189 def test_encode_one_line_trailing_tab(self):
4190 self._test_encode('hello \t\n', 'hello =09\n')
4191
4192 def test_encode_trailing_space_before_maxlinelen(self):
4193 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
4194
R David Murrayb938c8c2011-03-24 12:19:26 -04004195 def test_encode_trailing_space_at_maxlinelen(self):
4196 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
4197
R David Murrayec1b5b82011-03-23 14:19:05 -04004198 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04004199 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
4200
4201 def test_encode_whitespace_lines(self):
4202 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04004203
4204 def test_encode_quoted_equals(self):
4205 self._test_encode('a = b', 'a =3D b')
4206
4207 def test_encode_one_long_string(self):
4208 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
4209
4210 def test_encode_one_long_line(self):
4211 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4212
4213 def test_encode_one_very_long_line(self):
4214 self._test_encode('x' * 200 + '\n',
4215 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
4216
R David Murrayec1b5b82011-03-23 14:19:05 -04004217 def test_encode_shortest_maxlinelen(self):
4218 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004219
R David Murrayb938c8c2011-03-24 12:19:26 -04004220 def test_encode_maxlinelen_too_small(self):
4221 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
4222
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004223 def test_encode(self):
4224 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00004225 eq(quoprimime.body_encode(''), '')
4226 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004227 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00004228 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004229 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00004230 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004231xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
4232 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
4233x xxxx xxxx xxxx xxxx=20""")
4234 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00004235 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4236 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004237xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
4238 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
4239x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00004240 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004241one line
4242
4243two line"""), """\
4244one line
4245
4246two line""")
4247
4248
Ezio Melottib3aedd42010-11-20 19:04:17 +00004249
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004250# Test the Charset class
4251class TestCharset(unittest.TestCase):
4252 def tearDown(self):
4253 from email import charset as CharsetModule
4254 try:
4255 del CharsetModule.CHARSETS['fake']
4256 except KeyError:
4257 pass
4258
Guido van Rossum9604e662007-08-30 03:46:43 +00004259 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004260 eq = self.assertEqual
4261 # Make sure us-ascii = no Unicode conversion
4262 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00004263 eq(c.header_encode('Hello World!'), 'Hello World!')
4264 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004265 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00004266 self.assertRaises(UnicodeError, c.header_encode, s)
4267 c = Charset('utf-8')
4268 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004269
4270 def test_body_encode(self):
4271 eq = self.assertEqual
4272 # Try a charset with QP body encoding
4273 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004274 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004275 # Try a charset with Base64 body encoding
4276 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00004277 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004278 # Try a charset with None body encoding
4279 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004280 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004281 # Try the convert argument, where input codec != output codec
4282 c = Charset('euc-jp')
4283 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00004284 # XXX FIXME
4285## try:
4286## eq('\x1b$B5FCO;~IW\x1b(B',
4287## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4288## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4289## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4290## except LookupError:
4291## # We probably don't have the Japanese codecs installed
4292## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004293 # Testing SF bug #625509, which we have to fake, since there are no
4294 # built-in encodings where the header encoding is QP but the body
4295 # encoding is not.
4296 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04004297 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004298 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04004299 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004300
4301 def test_unicode_charset_name(self):
4302 charset = Charset('us-ascii')
4303 self.assertEqual(str(charset), 'us-ascii')
4304 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4305
4306
Ezio Melottib3aedd42010-11-20 19:04:17 +00004307
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004308# Test multilingual MIME headers.
4309class TestHeader(TestEmailBase):
4310 def test_simple(self):
4311 eq = self.ndiffAssertEqual
4312 h = Header('Hello World!')
4313 eq(h.encode(), 'Hello World!')
4314 h.append(' Goodbye World!')
4315 eq(h.encode(), 'Hello World! Goodbye World!')
4316
4317 def test_simple_surprise(self):
4318 eq = self.ndiffAssertEqual
4319 h = Header('Hello World!')
4320 eq(h.encode(), 'Hello World!')
4321 h.append('Goodbye World!')
4322 eq(h.encode(), 'Hello World! Goodbye World!')
4323
4324 def test_header_needs_no_decoding(self):
4325 h = 'no decoding needed'
4326 self.assertEqual(decode_header(h), [(h, None)])
4327
4328 def test_long(self):
4329 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4330 maxlinelen=76)
4331 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004332 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004333
4334 def test_multilingual(self):
4335 eq = self.ndiffAssertEqual
4336 g = Charset("iso-8859-1")
4337 cz = Charset("iso-8859-2")
4338 utf8 = Charset("utf-8")
4339 g_head = (b'Die Mieter treten hier ein werden mit einem '
4340 b'Foerderband komfortabel den Korridor entlang, '
4341 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4342 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4343 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4344 b'd\xf9vtipu.. ')
4345 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4346 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4347 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4348 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4349 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4350 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4351 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4352 '\u3044\u307e\u3059\u3002')
4353 h = Header(g_head, g)
4354 h.append(cz_head, cz)
4355 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004356 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004357 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004358=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4359 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4360 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4361 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004362 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4363 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4364 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4365 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004366 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4367 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4368 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4369 decoded = decode_header(enc)
4370 eq(len(decoded), 3)
4371 eq(decoded[0], (g_head, 'iso-8859-1'))
4372 eq(decoded[1], (cz_head, 'iso-8859-2'))
4373 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004374 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004375 eq(ustr,
4376 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4377 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4378 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4379 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4380 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4381 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4382 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4383 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4384 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4385 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4386 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4387 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4388 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4389 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4390 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4391 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4392 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004393 # Test make_header()
4394 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004395 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004396
4397 def test_empty_header_encode(self):
4398 h = Header()
4399 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004400
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004401 def test_header_ctor_default_args(self):
4402 eq = self.ndiffAssertEqual
4403 h = Header()
4404 eq(h, '')
4405 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004406 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004407
4408 def test_explicit_maxlinelen(self):
4409 eq = self.ndiffAssertEqual
4410 hstr = ('A very long line that must get split to something other '
4411 'than at the 76th character boundary to test the non-default '
4412 'behavior')
4413 h = Header(hstr)
4414 eq(h.encode(), '''\
4415A very long line that must get split to something other than at the 76th
4416 character boundary to test the non-default behavior''')
4417 eq(str(h), hstr)
4418 h = Header(hstr, header_name='Subject')
4419 eq(h.encode(), '''\
4420A very long line that must get split to something other than at the
4421 76th character boundary to test the non-default behavior''')
4422 eq(str(h), hstr)
4423 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4424 eq(h.encode(), hstr)
4425 eq(str(h), hstr)
4426
Guido van Rossum9604e662007-08-30 03:46:43 +00004427 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004428 eq = self.ndiffAssertEqual
4429 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004430 x = 'xxxx ' * 20
4431 h.append(x)
4432 s = h.encode()
4433 eq(s, """\
4434=?iso-8859-1?q?xxx?=
4435 =?iso-8859-1?q?x_?=
4436 =?iso-8859-1?q?xx?=
4437 =?iso-8859-1?q?xx?=
4438 =?iso-8859-1?q?_x?=
4439 =?iso-8859-1?q?xx?=
4440 =?iso-8859-1?q?x_?=
4441 =?iso-8859-1?q?xx?=
4442 =?iso-8859-1?q?xx?=
4443 =?iso-8859-1?q?_x?=
4444 =?iso-8859-1?q?xx?=
4445 =?iso-8859-1?q?x_?=
4446 =?iso-8859-1?q?xx?=
4447 =?iso-8859-1?q?xx?=
4448 =?iso-8859-1?q?_x?=
4449 =?iso-8859-1?q?xx?=
4450 =?iso-8859-1?q?x_?=
4451 =?iso-8859-1?q?xx?=
4452 =?iso-8859-1?q?xx?=
4453 =?iso-8859-1?q?_x?=
4454 =?iso-8859-1?q?xx?=
4455 =?iso-8859-1?q?x_?=
4456 =?iso-8859-1?q?xx?=
4457 =?iso-8859-1?q?xx?=
4458 =?iso-8859-1?q?_x?=
4459 =?iso-8859-1?q?xx?=
4460 =?iso-8859-1?q?x_?=
4461 =?iso-8859-1?q?xx?=
4462 =?iso-8859-1?q?xx?=
4463 =?iso-8859-1?q?_x?=
4464 =?iso-8859-1?q?xx?=
4465 =?iso-8859-1?q?x_?=
4466 =?iso-8859-1?q?xx?=
4467 =?iso-8859-1?q?xx?=
4468 =?iso-8859-1?q?_x?=
4469 =?iso-8859-1?q?xx?=
4470 =?iso-8859-1?q?x_?=
4471 =?iso-8859-1?q?xx?=
4472 =?iso-8859-1?q?xx?=
4473 =?iso-8859-1?q?_x?=
4474 =?iso-8859-1?q?xx?=
4475 =?iso-8859-1?q?x_?=
4476 =?iso-8859-1?q?xx?=
4477 =?iso-8859-1?q?xx?=
4478 =?iso-8859-1?q?_x?=
4479 =?iso-8859-1?q?xx?=
4480 =?iso-8859-1?q?x_?=
4481 =?iso-8859-1?q?xx?=
4482 =?iso-8859-1?q?xx?=
4483 =?iso-8859-1?q?_?=""")
4484 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004485 h = Header(charset='iso-8859-1', maxlinelen=40)
4486 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004487 s = h.encode()
4488 eq(s, """\
4489=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4490 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4491 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4492 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4493 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4494 eq(x, str(make_header(decode_header(s))))
4495
4496 def test_base64_splittable(self):
4497 eq = self.ndiffAssertEqual
4498 h = Header(charset='koi8-r', maxlinelen=20)
4499 x = 'xxxx ' * 20
4500 h.append(x)
4501 s = h.encode()
4502 eq(s, """\
4503=?koi8-r?b?eHh4?=
4504 =?koi8-r?b?eCB4?=
4505 =?koi8-r?b?eHh4?=
4506 =?koi8-r?b?IHh4?=
4507 =?koi8-r?b?eHgg?=
4508 =?koi8-r?b?eHh4?=
4509 =?koi8-r?b?eCB4?=
4510 =?koi8-r?b?eHh4?=
4511 =?koi8-r?b?IHh4?=
4512 =?koi8-r?b?eHgg?=
4513 =?koi8-r?b?eHh4?=
4514 =?koi8-r?b?eCB4?=
4515 =?koi8-r?b?eHh4?=
4516 =?koi8-r?b?IHh4?=
4517 =?koi8-r?b?eHgg?=
4518 =?koi8-r?b?eHh4?=
4519 =?koi8-r?b?eCB4?=
4520 =?koi8-r?b?eHh4?=
4521 =?koi8-r?b?IHh4?=
4522 =?koi8-r?b?eHgg?=
4523 =?koi8-r?b?eHh4?=
4524 =?koi8-r?b?eCB4?=
4525 =?koi8-r?b?eHh4?=
4526 =?koi8-r?b?IHh4?=
4527 =?koi8-r?b?eHgg?=
4528 =?koi8-r?b?eHh4?=
4529 =?koi8-r?b?eCB4?=
4530 =?koi8-r?b?eHh4?=
4531 =?koi8-r?b?IHh4?=
4532 =?koi8-r?b?eHgg?=
4533 =?koi8-r?b?eHh4?=
4534 =?koi8-r?b?eCB4?=
4535 =?koi8-r?b?eHh4?=
4536 =?koi8-r?b?IA==?=""")
4537 eq(x, str(make_header(decode_header(s))))
4538 h = Header(charset='koi8-r', maxlinelen=40)
4539 h.append(x)
4540 s = h.encode()
4541 eq(s, """\
4542=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4543 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4544 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4545 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4546 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4547 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4548 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004549
4550 def test_us_ascii_header(self):
4551 eq = self.assertEqual
4552 s = 'hello'
4553 x = decode_header(s)
4554 eq(x, [('hello', None)])
4555 h = make_header(x)
4556 eq(s, h.encode())
4557
4558 def test_string_charset(self):
4559 eq = self.assertEqual
4560 h = Header()
4561 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004562 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004563
4564## def test_unicode_error(self):
4565## raises = self.assertRaises
4566## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4567## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4568## h = Header()
4569## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4570## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4571## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4572
4573 def test_utf8_shortest(self):
4574 eq = self.assertEqual
4575 h = Header('p\xf6stal', 'utf-8')
4576 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4577 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4578 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4579
4580 def test_bad_8bit_header(self):
4581 raises = self.assertRaises
4582 eq = self.assertEqual
4583 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4584 raises(UnicodeError, Header, x)
4585 h = Header()
4586 raises(UnicodeError, h.append, x)
4587 e = x.decode('utf-8', 'replace')
4588 eq(str(Header(x, errors='replace')), e)
4589 h.append(x, errors='replace')
4590 eq(str(h), e)
4591
R David Murray041015c2011-03-25 15:10:55 -04004592 def test_escaped_8bit_header(self):
4593 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004594 e = x.decode('ascii', 'surrogateescape')
4595 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004596 self.assertEqual(str(h),
4597 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4598 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4599
R David Murraye5e366c2011-06-18 12:57:28 -04004600 def test_header_handles_binary_unknown8bit(self):
4601 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4602 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4603 self.assertEqual(str(h),
4604 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4605 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4606
4607 def test_make_header_handles_binary_unknown8bit(self):
4608 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4609 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4610 h2 = email.header.make_header(email.header.decode_header(h))
4611 self.assertEqual(str(h2),
4612 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4613 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4614
R David Murray041015c2011-03-25 15:10:55 -04004615 def test_modify_returned_list_does_not_change_header(self):
4616 h = Header('test')
4617 chunks = email.header.decode_header(h)
4618 chunks.append(('ascii', 'test2'))
4619 self.assertEqual(str(h), 'test')
4620
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004621 def test_encoded_adjacent_nonencoded(self):
4622 eq = self.assertEqual
4623 h = Header()
4624 h.append('hello', 'iso-8859-1')
4625 h.append('world')
4626 s = h.encode()
4627 eq(s, '=?iso-8859-1?q?hello?= world')
4628 h = make_header(decode_header(s))
4629 eq(h.encode(), s)
4630
R David Murray07ea53c2012-06-02 17:56:49 -04004631 def test_whitespace_keeper(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004632 eq = self.assertEqual
4633 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4634 parts = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04004635 eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004636 hdr = make_header(parts)
4637 eq(hdr.encode(),
4638 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4639
4640 def test_broken_base64_header(self):
4641 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004642 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004643 raises(errors.HeaderParseError, decode_header, s)
4644
R. David Murray477efb32011-01-05 01:39:32 +00004645 def test_shift_jis_charset(self):
4646 h = Header('文', charset='shift_jis')
4647 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4648
R David Murrayde912762011-03-16 18:26:23 -04004649 def test_flatten_header_with_no_value(self):
4650 # Issue 11401 (regression from email 4.x) Note that the space after
4651 # the header doesn't reflect the input, but this is also the way
4652 # email 4.x behaved. At some point it would be nice to fix that.
4653 msg = email.message_from_string("EmptyHeader:")
4654 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4655
R David Murray01581ee2011-04-18 10:04:34 -04004656 def test_encode_preserves_leading_ws_on_value(self):
4657 msg = Message()
4658 msg['SomeHeader'] = ' value with leading ws'
4659 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4660
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004661
Ezio Melottib3aedd42010-11-20 19:04:17 +00004662
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004663# Test RFC 2231 header parameters (en/de)coding
4664class TestRFC2231(TestEmailBase):
R David Murray97f43c02012-06-24 05:03:27 -04004665
4666 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
4667 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004668 def test_get_param(self):
4669 eq = self.assertEqual
4670 msg = self._msgobj('msg_29.txt')
4671 eq(msg.get_param('title'),
4672 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4673 eq(msg.get_param('title', unquote=False),
4674 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4675
4676 def test_set_param(self):
4677 eq = self.ndiffAssertEqual
4678 msg = Message()
4679 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4680 charset='us-ascii')
4681 eq(msg.get_param('title'),
4682 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4683 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4684 charset='us-ascii', language='en')
4685 eq(msg.get_param('title'),
4686 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4687 msg = self._msgobj('msg_01.txt')
4688 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4689 charset='us-ascii', language='en')
4690 eq(msg.as_string(maxheaderlen=78), """\
4691Return-Path: <bbb@zzz.org>
4692Delivered-To: bbb@zzz.org
4693Received: by mail.zzz.org (Postfix, from userid 889)
4694\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4695MIME-Version: 1.0
4696Content-Transfer-Encoding: 7bit
4697Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4698From: bbb@ddd.com (John X. Doe)
4699To: bbb@zzz.org
4700Subject: This is a test message
4701Date: Fri, 4 May 2001 14:05:44 -0400
4702Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004703 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004704
4705
4706Hi,
4707
4708Do you like this message?
4709
4710-Me
4711""")
4712
R David Murraya2860e82011-04-16 09:20:30 -04004713 def test_set_param_requote(self):
4714 msg = Message()
4715 msg.set_param('title', 'foo')
4716 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4717 msg.set_param('title', 'bar', requote=False)
4718 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4719 # tspecial is still quoted.
4720 msg.set_param('title', "(bar)bell", requote=False)
4721 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4722
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004723 def test_del_param(self):
4724 eq = self.ndiffAssertEqual
4725 msg = self._msgobj('msg_01.txt')
4726 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4727 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4728 charset='us-ascii', language='en')
4729 msg.del_param('foo', header='Content-Type')
4730 eq(msg.as_string(maxheaderlen=78), """\
4731Return-Path: <bbb@zzz.org>
4732Delivered-To: bbb@zzz.org
4733Received: by mail.zzz.org (Postfix, from userid 889)
4734\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4735MIME-Version: 1.0
4736Content-Transfer-Encoding: 7bit
4737Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4738From: bbb@ddd.com (John X. Doe)
4739To: bbb@zzz.org
4740Subject: This is a test message
4741Date: Fri, 4 May 2001 14:05:44 -0400
4742Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004743 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004744
4745
4746Hi,
4747
4748Do you like this message?
4749
4750-Me
4751""")
4752
R David Murray97f43c02012-06-24 05:03:27 -04004753 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset
4754 # I changed the charset name, though, because the one in the file isn't
4755 # a legal charset name. Should add a test for an illegal charset.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004756 def test_rfc2231_get_content_charset(self):
4757 eq = self.assertEqual
4758 msg = self._msgobj('msg_32.txt')
4759 eq(msg.get_content_charset(), 'us-ascii')
4760
R David Murray97f43c02012-06-24 05:03:27 -04004761 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes
R. David Murraydfd7eb02010-12-24 22:36:49 +00004762 def test_rfc2231_parse_rfc_quoting(self):
4763 m = textwrap.dedent('''\
4764 Content-Disposition: inline;
4765 \tfilename*0*=''This%20is%20even%20more%20;
4766 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4767 \tfilename*2="is it not.pdf"
4768
4769 ''')
4770 msg = email.message_from_string(m)
4771 self.assertEqual(msg.get_filename(),
4772 'This is even more ***fun*** is it not.pdf')
4773 self.assertEqual(m, msg.as_string())
4774
R David Murray97f43c02012-06-24 05:03:27 -04004775 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
R. David Murraydfd7eb02010-12-24 22:36:49 +00004776 def test_rfc2231_parse_extra_quoting(self):
4777 m = textwrap.dedent('''\
4778 Content-Disposition: inline;
4779 \tfilename*0*="''This%20is%20even%20more%20";
4780 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4781 \tfilename*2="is it not.pdf"
4782
4783 ''')
4784 msg = email.message_from_string(m)
4785 self.assertEqual(msg.get_filename(),
4786 'This is even more ***fun*** is it not.pdf')
4787 self.assertEqual(m, msg.as_string())
4788
R David Murray97f43c02012-06-24 05:03:27 -04004789 # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset
4790 # but new test uses *0* because otherwise lang/charset is not valid.
4791 # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004792 def test_rfc2231_no_language_or_charset(self):
4793 m = '''\
4794Content-Transfer-Encoding: 8bit
4795Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4796Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4797
4798'''
4799 msg = email.message_from_string(m)
4800 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004801 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004802 self.assertEqual(
4803 param,
4804 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4805
R David Murray97f43c02012-06-24 05:03:27 -04004806 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004807 def test_rfc2231_no_language_or_charset_in_filename(self):
4808 m = '''\
4809Content-Disposition: inline;
4810\tfilename*0*="''This%20is%20even%20more%20";
4811\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4812\tfilename*2="is it not.pdf"
4813
4814'''
4815 msg = email.message_from_string(m)
4816 self.assertEqual(msg.get_filename(),
4817 'This is even more ***fun*** is it not.pdf')
4818
R David Murray97f43c02012-06-24 05:03:27 -04004819 # Duplicate of previous test?
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004820 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4821 m = '''\
4822Content-Disposition: inline;
4823\tfilename*0*="''This%20is%20even%20more%20";
4824\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4825\tfilename*2="is it not.pdf"
4826
4827'''
4828 msg = email.message_from_string(m)
4829 self.assertEqual(msg.get_filename(),
4830 'This is even more ***fun*** is it not.pdf')
4831
R David Murray97f43c02012-06-24 05:03:27 -04004832 # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded,
4833 # but the test below is wrong (the first part should be decoded).
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004834 def test_rfc2231_partly_encoded(self):
4835 m = '''\
4836Content-Disposition: inline;
4837\tfilename*0="''This%20is%20even%20more%20";
4838\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4839\tfilename*2="is it not.pdf"
4840
4841'''
4842 msg = email.message_from_string(m)
4843 self.assertEqual(
4844 msg.get_filename(),
4845 'This%20is%20even%20more%20***fun*** is it not.pdf')
4846
4847 def test_rfc2231_partly_nonencoded(self):
4848 m = '''\
4849Content-Disposition: inline;
4850\tfilename*0="This%20is%20even%20more%20";
4851\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4852\tfilename*2="is it not.pdf"
4853
4854'''
4855 msg = email.message_from_string(m)
4856 self.assertEqual(
4857 msg.get_filename(),
4858 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4859
4860 def test_rfc2231_no_language_or_charset_in_boundary(self):
4861 m = '''\
4862Content-Type: multipart/alternative;
4863\tboundary*0*="''This%20is%20even%20more%20";
4864\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4865\tboundary*2="is it not.pdf"
4866
4867'''
4868 msg = email.message_from_string(m)
4869 self.assertEqual(msg.get_boundary(),
4870 'This is even more ***fun*** is it not.pdf')
4871
4872 def test_rfc2231_no_language_or_charset_in_charset(self):
4873 # This is a nonsensical charset value, but tests the code anyway
4874 m = '''\
4875Content-Type: text/plain;
4876\tcharset*0*="This%20is%20even%20more%20";
4877\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4878\tcharset*2="is it not.pdf"
4879
4880'''
4881 msg = email.message_from_string(m)
4882 self.assertEqual(msg.get_content_charset(),
4883 'this is even more ***fun*** is it not.pdf')
4884
R David Murray97f43c02012-06-24 05:03:27 -04004885 # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004886 def test_rfc2231_bad_encoding_in_filename(self):
4887 m = '''\
4888Content-Disposition: inline;
4889\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4890\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4891\tfilename*2="is it not.pdf"
4892
4893'''
4894 msg = email.message_from_string(m)
4895 self.assertEqual(msg.get_filename(),
4896 'This is even more ***fun*** is it not.pdf')
4897
4898 def test_rfc2231_bad_encoding_in_charset(self):
4899 m = """\
4900Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4901
4902"""
4903 msg = email.message_from_string(m)
4904 # This should return None because non-ascii characters in the charset
4905 # are not allowed.
4906 self.assertEqual(msg.get_content_charset(), None)
4907
4908 def test_rfc2231_bad_character_in_charset(self):
4909 m = """\
4910Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4911
4912"""
4913 msg = email.message_from_string(m)
4914 # This should return None because non-ascii characters in the charset
4915 # are not allowed.
4916 self.assertEqual(msg.get_content_charset(), None)
4917
4918 def test_rfc2231_bad_character_in_filename(self):
4919 m = '''\
4920Content-Disposition: inline;
4921\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4922\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4923\tfilename*2*="is it not.pdf%E2"
4924
4925'''
4926 msg = email.message_from_string(m)
4927 self.assertEqual(msg.get_filename(),
4928 'This is even more ***fun*** is it not.pdf\ufffd')
4929
4930 def test_rfc2231_unknown_encoding(self):
4931 m = """\
4932Content-Transfer-Encoding: 8bit
4933Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4934
4935"""
4936 msg = email.message_from_string(m)
4937 self.assertEqual(msg.get_filename(), 'myfile.txt')
4938
4939 def test_rfc2231_single_tick_in_filename_extended(self):
4940 eq = self.assertEqual
4941 m = """\
4942Content-Type: application/x-foo;
4943\tname*0*=\"Frank's\"; name*1*=\" Document\"
4944
4945"""
4946 msg = email.message_from_string(m)
4947 charset, language, s = msg.get_param('name')
4948 eq(charset, None)
4949 eq(language, None)
4950 eq(s, "Frank's Document")
4951
R David Murray97f43c02012-06-24 05:03:27 -04004952 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004953 def test_rfc2231_single_tick_in_filename(self):
4954 m = """\
4955Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4956
4957"""
4958 msg = email.message_from_string(m)
4959 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004960 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004961 self.assertEqual(param, "Frank's Document")
4962
R David Murray97f43c02012-06-24 05:03:27 -04004963 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004964 def test_rfc2231_tick_attack_extended(self):
4965 eq = self.assertEqual
4966 m = """\
4967Content-Type: application/x-foo;
4968\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4969
4970"""
4971 msg = email.message_from_string(m)
4972 charset, language, s = msg.get_param('name')
4973 eq(charset, 'us-ascii')
4974 eq(language, 'en-us')
4975 eq(s, "Frank's Document")
4976
R David Murray97f43c02012-06-24 05:03:27 -04004977 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004978 def test_rfc2231_tick_attack(self):
4979 m = """\
4980Content-Type: application/x-foo;
4981\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4982
4983"""
4984 msg = email.message_from_string(m)
4985 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004986 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004987 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4988
R David Murray97f43c02012-06-24 05:03:27 -04004989 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004990 def test_rfc2231_no_extended_values(self):
4991 eq = self.assertEqual
4992 m = """\
4993Content-Type: application/x-foo; name=\"Frank's Document\"
4994
4995"""
4996 msg = email.message_from_string(m)
4997 eq(msg.get_param('name'), "Frank's Document")
4998
R David Murray97f43c02012-06-24 05:03:27 -04004999 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005000 def test_rfc2231_encoded_then_unencoded_segments(self):
5001 eq = self.assertEqual
5002 m = """\
5003Content-Type: application/x-foo;
5004\tname*0*=\"us-ascii'en-us'My\";
5005\tname*1=\" Document\";
5006\tname*2*=\" For You\"
5007
5008"""
5009 msg = email.message_from_string(m)
5010 charset, language, s = msg.get_param('name')
5011 eq(charset, 'us-ascii')
5012 eq(language, 'en-us')
5013 eq(s, 'My Document For You')
5014
R David Murray97f43c02012-06-24 05:03:27 -04005015 # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments
5016 # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005017 def test_rfc2231_unencoded_then_encoded_segments(self):
5018 eq = self.assertEqual
5019 m = """\
5020Content-Type: application/x-foo;
5021\tname*0=\"us-ascii'en-us'My\";
5022\tname*1*=\" Document\";
5023\tname*2*=\" For You\"
5024
5025"""
5026 msg = email.message_from_string(m)
5027 charset, language, s = msg.get_param('name')
5028 eq(charset, 'us-ascii')
5029 eq(language, 'en-us')
5030 eq(s, 'My Document For You')
5031
5032
Ezio Melottib3aedd42010-11-20 19:04:17 +00005033
R. David Murraya8f480f2010-01-16 18:30:03 +00005034# Tests to ensure that signed parts of an email are completely preserved, as
5035# required by RFC1847 section 2.1. Note that these are incomplete, because the
5036# email package does not currently always preserve the body. See issue 1670765.
5037class TestSigned(TestEmailBase):
5038
5039 def _msg_and_obj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -04005040 with openfile(filename) as fp:
R. David Murraya8f480f2010-01-16 18:30:03 +00005041 original = fp.read()
5042 msg = email.message_from_string(original)
5043 return original, msg
5044
5045 def _signed_parts_eq(self, original, result):
5046 # Extract the first mime part of each message
5047 import re
5048 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
5049 inpart = repart.search(original).group(2)
5050 outpart = repart.search(result).group(2)
5051 self.assertEqual(outpart, inpart)
5052
5053 def test_long_headers_as_string(self):
5054 original, msg = self._msg_and_obj('msg_45.txt')
5055 result = msg.as_string()
5056 self._signed_parts_eq(original, result)
5057
5058 def test_long_headers_as_string_maxheaderlen(self):
5059 original, msg = self._msg_and_obj('msg_45.txt')
5060 result = msg.as_string(maxheaderlen=60)
5061 self._signed_parts_eq(original, result)
5062
5063 def test_long_headers_flatten(self):
5064 original, msg = self._msg_and_obj('msg_45.txt')
5065 fp = StringIO()
5066 Generator(fp).flatten(msg)
5067 result = fp.getvalue()
5068 self._signed_parts_eq(original, result)
5069
5070
Ezio Melottib3aedd42010-11-20 19:04:17 +00005071
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005072if __name__ == '__main__':
R David Murray9aaba782011-03-21 17:17:06 -04005073 unittest.main()