blob: b62a889238bdeadc03108b97470920d4e6522aa1 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
R. David Murray719a4492010-11-21 16:53:48 +00005import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00006import time
7import base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +00008import unittest
R. David Murray96fd54e2010-10-08 15:55:28 +00009import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000010
R. David Murray96fd54e2010-10-08 15:55:28 +000011from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000012from itertools import chain
13
14import email
R David Murrayc27e5222012-05-25 15:01:48 -040015import email.policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016
17from email.charset import Charset
18from email.header import Header, decode_header, make_header
19from email.parser import Parser, HeaderParser
R David Murray638d40b2012-08-24 11:14:13 -040020from email.generator import Generator, DecodedGenerator, BytesGenerator
Guido van Rossum8b3febe2007-08-30 01:15:14 +000021from email.message import Message
22from email.mime.application import MIMEApplication
23from email.mime.audio import MIMEAudio
24from email.mime.text import MIMEText
25from email.mime.image import MIMEImage
26from email.mime.base import MIMEBase
27from email.mime.message import MIMEMessage
28from email.mime.multipart import MIMEMultipart
29from email import utils
30from email import errors
31from email import encoders
32from email import iterators
33from email import base64mime
34from email import quoprimime
35
R David Murray965794e2013-03-07 18:16:47 -050036from test.support import unlink
R David Murraya256bac2011-03-31 12:20:23 -040037from test.test_email import openfile, TestEmailBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +000038
R David Murray612528d2013-03-15 20:38:15 -040039# These imports are documented to work, but we are testing them using a
40# different path, so we import them here just to make sure they are importable.
41from email.parser import FeedParser, BytesFeedParser
42
Guido van Rossum8b3febe2007-08-30 01:15:14 +000043NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Guido van Rossum8b3febe2007-08-30 01:15:14 +000048# Test various aspects of the Message class's API
49class TestMessageAPI(TestEmailBase):
50 def test_get_all(self):
51 eq = self.assertEqual
52 msg = self._msgobj('msg_20.txt')
53 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
54 eq(msg.get_all('xx', 'n/a'), 'n/a')
55
R. David Murraye5db2632010-11-20 15:10:13 +000056 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000057 eq = self.assertEqual
58 msg = Message()
59 eq(msg.get_charset(), None)
60 charset = Charset('iso-8859-1')
61 msg.set_charset(charset)
62 eq(msg['mime-version'], '1.0')
63 eq(msg.get_content_type(), 'text/plain')
64 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
65 eq(msg.get_param('charset'), 'iso-8859-1')
66 eq(msg['content-transfer-encoding'], 'quoted-printable')
67 eq(msg.get_charset().input_charset, 'iso-8859-1')
68 # Remove the charset
69 msg.set_charset(None)
70 eq(msg.get_charset(), None)
71 eq(msg['content-type'], 'text/plain')
72 # Try adding a charset when there's already MIME headers present
73 msg = Message()
74 msg['MIME-Version'] = '2.0'
75 msg['Content-Type'] = 'text/x-weird'
76 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
77 msg.set_charset(charset)
78 eq(msg['mime-version'], '2.0')
79 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
80 eq(msg['content-transfer-encoding'], 'quinted-puntable')
81
82 def test_set_charset_from_string(self):
83 eq = self.assertEqual
84 msg = Message()
85 msg.set_charset('us-ascii')
86 eq(msg.get_charset().input_charset, 'us-ascii')
87 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
88
89 def test_set_payload_with_charset(self):
90 msg = Message()
91 charset = Charset('iso-8859-1')
92 msg.set_payload('This is a string payload', charset)
93 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
94
95 def test_get_charsets(self):
96 eq = self.assertEqual
97
98 msg = self._msgobj('msg_08.txt')
99 charsets = msg.get_charsets()
100 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
101
102 msg = self._msgobj('msg_09.txt')
103 charsets = msg.get_charsets('dingbat')
104 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
105 'koi8-r'])
106
107 msg = self._msgobj('msg_12.txt')
108 charsets = msg.get_charsets()
109 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
110 'iso-8859-3', 'us-ascii', 'koi8-r'])
111
112 def test_get_filename(self):
113 eq = self.assertEqual
114
115 msg = self._msgobj('msg_04.txt')
116 filenames = [p.get_filename() for p in msg.get_payload()]
117 eq(filenames, ['msg.txt', 'msg.txt'])
118
119 msg = self._msgobj('msg_07.txt')
120 subpart = msg.get_payload(1)
121 eq(subpart.get_filename(), 'dingusfish.gif')
122
123 def test_get_filename_with_name_parameter(self):
124 eq = self.assertEqual
125
126 msg = self._msgobj('msg_44.txt')
127 filenames = [p.get_filename() for p in msg.get_payload()]
128 eq(filenames, ['msg.txt', 'msg.txt'])
129
130 def test_get_boundary(self):
131 eq = self.assertEqual
132 msg = self._msgobj('msg_07.txt')
133 # No quotes!
134 eq(msg.get_boundary(), 'BOUNDARY')
135
136 def test_set_boundary(self):
137 eq = self.assertEqual
138 # This one has no existing boundary parameter, but the Content-Type:
139 # header appears fifth.
140 msg = self._msgobj('msg_01.txt')
141 msg.set_boundary('BOUNDARY')
142 header, value = msg.items()[4]
143 eq(header.lower(), 'content-type')
144 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
145 # This one has a Content-Type: header, with a boundary, stuck in the
146 # middle of its headers. Make sure the order is preserved; it should
147 # be fifth.
148 msg = self._msgobj('msg_04.txt')
149 msg.set_boundary('BOUNDARY')
150 header, value = msg.items()[4]
151 eq(header.lower(), 'content-type')
152 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
153 # And this one has no Content-Type: header at all.
154 msg = self._msgobj('msg_03.txt')
155 self.assertRaises(errors.HeaderParseError,
156 msg.set_boundary, 'BOUNDARY')
157
R. David Murray73a559d2010-12-21 18:07:59 +0000158 def test_make_boundary(self):
159 msg = MIMEMultipart('form-data')
160 # Note that when the boundary gets created is an implementation
161 # detail and might change.
162 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
163 # Trigger creation of boundary
164 msg.as_string()
165 self.assertEqual(msg.items()[0][1][:33],
166 'multipart/form-data; boundary="==')
167 # XXX: there ought to be tests of the uniqueness of the boundary, too.
168
R. David Murray57c45ac2010-02-21 04:39:40 +0000169 def test_message_rfc822_only(self):
170 # Issue 7970: message/rfc822 not in multipart parsed by
171 # HeaderParser caused an exception when flattened.
R David Murray28346b82011-03-31 11:40:20 -0400172 with openfile('msg_46.txt') as fp:
Brett Cannon384917a2010-10-29 23:08:36 +0000173 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000174 parser = HeaderParser()
175 msg = parser.parsestr(msgdata)
176 out = StringIO()
177 gen = Generator(out, True, 0)
178 gen.flatten(msg, False)
179 self.assertEqual(out.getvalue(), msgdata)
180
R David Murrayb35c8502011-04-13 16:46:05 -0400181 def test_byte_message_rfc822_only(self):
182 # Make sure new bytes header parser also passes this.
Terry Jan Reedy740d6b62013-08-31 17:12:21 -0400183 with openfile('msg_46.txt') as fp:
184 msgdata = fp.read().encode('ascii')
R David Murrayb35c8502011-04-13 16:46:05 -0400185 parser = email.parser.BytesHeaderParser()
186 msg = parser.parsebytes(msgdata)
187 out = BytesIO()
188 gen = email.generator.BytesGenerator(out)
189 gen.flatten(msg)
190 self.assertEqual(out.getvalue(), msgdata)
191
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000192 def test_get_decoded_payload(self):
193 eq = self.assertEqual
194 msg = self._msgobj('msg_10.txt')
195 # The outer message is a multipart
196 eq(msg.get_payload(decode=True), None)
197 # Subpart 1 is 7bit encoded
198 eq(msg.get_payload(0).get_payload(decode=True),
199 b'This is a 7bit encoded message.\n')
200 # Subpart 2 is quopri
201 eq(msg.get_payload(1).get_payload(decode=True),
202 b'\xa1This is a Quoted Printable encoded message!\n')
203 # Subpart 3 is base64
204 eq(msg.get_payload(2).get_payload(decode=True),
205 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000206 # Subpart 4 is base64 with a trailing newline, which
207 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000208 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000209 b'This is a Base64 encoded message.\n')
210 # Subpart 5 has no Content-Transfer-Encoding: header.
211 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000212 b'This has no Content-Transfer-Encoding: header.\n')
213
214 def test_get_decoded_uu_payload(self):
215 eq = self.assertEqual
216 msg = Message()
217 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
218 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
219 msg['content-transfer-encoding'] = cte
220 eq(msg.get_payload(decode=True), b'hello world')
221 # Now try some bogus data
222 msg.set_payload('foo')
223 eq(msg.get_payload(decode=True), b'foo')
224
R David Murraya2860e82011-04-16 09:20:30 -0400225 def test_get_payload_n_raises_on_non_multipart(self):
226 msg = Message()
227 self.assertRaises(TypeError, msg.get_payload, 1)
228
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000229 def test_decoded_generator(self):
230 eq = self.assertEqual
231 msg = self._msgobj('msg_07.txt')
232 with openfile('msg_17.txt') as fp:
233 text = fp.read()
234 s = StringIO()
235 g = DecodedGenerator(s)
236 g.flatten(msg)
237 eq(s.getvalue(), text)
238
239 def test__contains__(self):
240 msg = Message()
241 msg['From'] = 'Me'
242 msg['to'] = 'You'
243 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000244 self.assertTrue('from' in msg)
245 self.assertTrue('From' in msg)
246 self.assertTrue('FROM' in msg)
247 self.assertTrue('to' in msg)
248 self.assertTrue('To' in msg)
249 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000250
251 def test_as_string(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000252 msg = self._msgobj('msg_01.txt')
253 with openfile('msg_01.txt') as fp:
254 text = fp.read()
R David Murraybb17d2b2013-08-09 16:15:28 -0400255 self.assertEqual(text, str(msg))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000256 fullrepr = msg.as_string(unixfrom=True)
257 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000258 self.assertTrue(lines[0].startswith('From '))
R David Murraybb17d2b2013-08-09 16:15:28 -0400259 self.assertEqual(text, NL.join(lines[1:]))
260
261 def test_as_string_policy(self):
262 msg = self._msgobj('msg_01.txt')
263 newpolicy = msg.policy.clone(linesep='\r\n')
264 fullrepr = msg.as_string(policy=newpolicy)
265 s = StringIO()
266 g = Generator(s, policy=newpolicy)
267 g.flatten(msg)
268 self.assertEqual(fullrepr, s.getvalue())
269
270 def test_as_bytes(self):
271 msg = self._msgobj('msg_01.txt')
Terry Jan Reedy7e7cf8b2013-08-31 17:16:45 -0400272 with openfile('msg_01.txt') as fp:
273 data = fp.read().encode('ascii')
R David Murraybb17d2b2013-08-09 16:15:28 -0400274 self.assertEqual(data, bytes(msg))
275 fullrepr = msg.as_bytes(unixfrom=True)
276 lines = fullrepr.split(b'\n')
277 self.assertTrue(lines[0].startswith(b'From '))
278 self.assertEqual(data, b'\n'.join(lines[1:]))
279
280 def test_as_bytes_policy(self):
281 msg = self._msgobj('msg_01.txt')
282 newpolicy = msg.policy.clone(linesep='\r\n')
283 fullrepr = msg.as_bytes(policy=newpolicy)
284 s = BytesIO()
285 g = BytesGenerator(s,policy=newpolicy)
286 g.flatten(msg)
287 self.assertEqual(fullrepr, s.getvalue())
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000288
R David Murray97f43c02012-06-24 05:03:27 -0400289 # test_headerregistry.TestContentTypeHeader.bad_params
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000290 def test_bad_param(self):
291 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
292 self.assertEqual(msg.get_param('baz'), '')
293
294 def test_missing_filename(self):
295 msg = email.message_from_string("From: foo\n")
296 self.assertEqual(msg.get_filename(), None)
297
298 def test_bogus_filename(self):
299 msg = email.message_from_string(
300 "Content-Disposition: blarg; filename\n")
301 self.assertEqual(msg.get_filename(), '')
302
303 def test_missing_boundary(self):
304 msg = email.message_from_string("From: foo\n")
305 self.assertEqual(msg.get_boundary(), None)
306
307 def test_get_params(self):
308 eq = self.assertEqual
309 msg = email.message_from_string(
310 'X-Header: foo=one; bar=two; baz=three\n')
311 eq(msg.get_params(header='x-header'),
312 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
313 msg = email.message_from_string(
314 'X-Header: foo; bar=one; baz=two\n')
315 eq(msg.get_params(header='x-header'),
316 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
317 eq(msg.get_params(), None)
318 msg = email.message_from_string(
319 'X-Header: foo; bar="one"; baz=two\n')
320 eq(msg.get_params(header='x-header'),
321 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
322
R David Murray97f43c02012-06-24 05:03:27 -0400323 # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000324 def test_get_param_liberal(self):
325 msg = Message()
326 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
327 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
328
329 def test_get_param(self):
330 eq = self.assertEqual
331 msg = email.message_from_string(
332 "X-Header: foo=one; bar=two; baz=three\n")
333 eq(msg.get_param('bar', header='x-header'), 'two')
334 eq(msg.get_param('quuz', header='x-header'), None)
335 eq(msg.get_param('quuz'), None)
336 msg = email.message_from_string(
337 'X-Header: foo; bar="one"; baz=two\n')
338 eq(msg.get_param('foo', header='x-header'), '')
339 eq(msg.get_param('bar', header='x-header'), 'one')
340 eq(msg.get_param('baz', header='x-header'), 'two')
341 # XXX: We are not RFC-2045 compliant! We cannot parse:
342 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
343 # msg.get_param("weird")
344 # yet.
345
R David Murray97f43c02012-06-24 05:03:27 -0400346 # test_headerregistry.TestContentTypeHeader.spaces_around_semis
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000347 def test_get_param_funky_continuation_lines(self):
348 msg = self._msgobj('msg_22.txt')
349 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
350
R David Murray97f43c02012-06-24 05:03:27 -0400351 # test_headerregistry.TestContentTypeHeader.semis_inside_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000352 def test_get_param_with_semis_in_quotes(self):
353 msg = email.message_from_string(
354 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
355 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
356 self.assertEqual(msg.get_param('name', unquote=False),
357 '"Jim&amp;&amp;Jill"')
358
R David Murray97f43c02012-06-24 05:03:27 -0400359 # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value
R. David Murrayd48739f2010-04-14 18:59:18 +0000360 def test_get_param_with_quotes(self):
361 msg = email.message_from_string(
362 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
363 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
364 msg = email.message_from_string(
365 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
366 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
367
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000368 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000369 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000370 msg = email.message_from_string('Header: exists')
371 unless('header' in msg)
372 unless('Header' in msg)
373 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000374 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000375
376 def test_set_param(self):
377 eq = self.assertEqual
378 msg = Message()
379 msg.set_param('charset', 'iso-2022-jp')
380 eq(msg.get_param('charset'), 'iso-2022-jp')
381 msg.set_param('importance', 'high value')
382 eq(msg.get_param('importance'), 'high value')
383 eq(msg.get_param('importance', unquote=False), '"high value"')
384 eq(msg.get_params(), [('text/plain', ''),
385 ('charset', 'iso-2022-jp'),
386 ('importance', 'high value')])
387 eq(msg.get_params(unquote=False), [('text/plain', ''),
388 ('charset', '"iso-2022-jp"'),
389 ('importance', '"high value"')])
390 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
391 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
392
393 def test_del_param(self):
394 eq = self.assertEqual
395 msg = self._msgobj('msg_05.txt')
396 eq(msg.get_params(),
397 [('multipart/report', ''), ('report-type', 'delivery-status'),
398 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
399 old_val = msg.get_param("report-type")
400 msg.del_param("report-type")
401 eq(msg.get_params(),
402 [('multipart/report', ''),
403 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
404 msg.set_param("report-type", old_val)
405 eq(msg.get_params(),
406 [('multipart/report', ''),
407 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
408 ('report-type', old_val)])
409
410 def test_del_param_on_other_header(self):
411 msg = Message()
412 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
413 msg.del_param('filename', 'content-disposition')
414 self.assertEqual(msg['content-disposition'], 'attachment')
415
R David Murraya2860e82011-04-16 09:20:30 -0400416 def test_del_param_on_nonexistent_header(self):
417 msg = Message()
R David Murray271ade82013-07-25 12:11:55 -0400418 # Deleting param on empty msg should not raise exception.
R David Murraya2860e82011-04-16 09:20:30 -0400419 msg.del_param('filename', 'content-disposition')
420
421 def test_del_nonexistent_param(self):
422 msg = Message()
423 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
424 existing_header = msg['Content-Type']
425 msg.del_param('foobar', header='Content-Type')
R David Murray271ade82013-07-25 12:11:55 -0400426 self.assertEqual(msg['Content-Type'], existing_header)
R David Murraya2860e82011-04-16 09:20:30 -0400427
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000428 def test_set_type(self):
429 eq = self.assertEqual
430 msg = Message()
431 self.assertRaises(ValueError, msg.set_type, 'text')
432 msg.set_type('text/plain')
433 eq(msg['content-type'], 'text/plain')
434 msg.set_param('charset', 'us-ascii')
435 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
436 msg.set_type('text/html')
437 eq(msg['content-type'], 'text/html; charset="us-ascii"')
438
439 def test_set_type_on_other_header(self):
440 msg = Message()
441 msg['X-Content-Type'] = 'text/plain'
442 msg.set_type('application/octet-stream', 'X-Content-Type')
443 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
444
445 def test_get_content_type_missing(self):
446 msg = Message()
447 self.assertEqual(msg.get_content_type(), 'text/plain')
448
449 def test_get_content_type_missing_with_default_type(self):
450 msg = Message()
451 msg.set_default_type('message/rfc822')
452 self.assertEqual(msg.get_content_type(), 'message/rfc822')
453
454 def test_get_content_type_from_message_implicit(self):
455 msg = self._msgobj('msg_30.txt')
456 self.assertEqual(msg.get_payload(0).get_content_type(),
457 'message/rfc822')
458
459 def test_get_content_type_from_message_explicit(self):
460 msg = self._msgobj('msg_28.txt')
461 self.assertEqual(msg.get_payload(0).get_content_type(),
462 'message/rfc822')
463
464 def test_get_content_type_from_message_text_plain_implicit(self):
465 msg = self._msgobj('msg_03.txt')
466 self.assertEqual(msg.get_content_type(), 'text/plain')
467
468 def test_get_content_type_from_message_text_plain_explicit(self):
469 msg = self._msgobj('msg_01.txt')
470 self.assertEqual(msg.get_content_type(), 'text/plain')
471
472 def test_get_content_maintype_missing(self):
473 msg = Message()
474 self.assertEqual(msg.get_content_maintype(), 'text')
475
476 def test_get_content_maintype_missing_with_default_type(self):
477 msg = Message()
478 msg.set_default_type('message/rfc822')
479 self.assertEqual(msg.get_content_maintype(), 'message')
480
481 def test_get_content_maintype_from_message_implicit(self):
482 msg = self._msgobj('msg_30.txt')
483 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
484
485 def test_get_content_maintype_from_message_explicit(self):
486 msg = self._msgobj('msg_28.txt')
487 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
488
489 def test_get_content_maintype_from_message_text_plain_implicit(self):
490 msg = self._msgobj('msg_03.txt')
491 self.assertEqual(msg.get_content_maintype(), 'text')
492
493 def test_get_content_maintype_from_message_text_plain_explicit(self):
494 msg = self._msgobj('msg_01.txt')
495 self.assertEqual(msg.get_content_maintype(), 'text')
496
497 def test_get_content_subtype_missing(self):
498 msg = Message()
499 self.assertEqual(msg.get_content_subtype(), 'plain')
500
501 def test_get_content_subtype_missing_with_default_type(self):
502 msg = Message()
503 msg.set_default_type('message/rfc822')
504 self.assertEqual(msg.get_content_subtype(), 'rfc822')
505
506 def test_get_content_subtype_from_message_implicit(self):
507 msg = self._msgobj('msg_30.txt')
508 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
509
510 def test_get_content_subtype_from_message_explicit(self):
511 msg = self._msgobj('msg_28.txt')
512 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
513
514 def test_get_content_subtype_from_message_text_plain_implicit(self):
515 msg = self._msgobj('msg_03.txt')
516 self.assertEqual(msg.get_content_subtype(), 'plain')
517
518 def test_get_content_subtype_from_message_text_plain_explicit(self):
519 msg = self._msgobj('msg_01.txt')
520 self.assertEqual(msg.get_content_subtype(), 'plain')
521
522 def test_get_content_maintype_error(self):
523 msg = Message()
524 msg['Content-Type'] = 'no-slash-in-this-string'
525 self.assertEqual(msg.get_content_maintype(), 'text')
526
527 def test_get_content_subtype_error(self):
528 msg = Message()
529 msg['Content-Type'] = 'no-slash-in-this-string'
530 self.assertEqual(msg.get_content_subtype(), 'plain')
531
532 def test_replace_header(self):
533 eq = self.assertEqual
534 msg = Message()
535 msg.add_header('First', 'One')
536 msg.add_header('Second', 'Two')
537 msg.add_header('Third', 'Three')
538 eq(msg.keys(), ['First', 'Second', 'Third'])
539 eq(msg.values(), ['One', 'Two', 'Three'])
540 msg.replace_header('Second', 'Twenty')
541 eq(msg.keys(), ['First', 'Second', 'Third'])
542 eq(msg.values(), ['One', 'Twenty', 'Three'])
543 msg.add_header('First', 'Eleven')
544 msg.replace_header('First', 'One Hundred')
545 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
546 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
547 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
548
R David Murray80e0aee2012-05-27 21:23:34 -0400549 # test_defect_handling:test_invalid_chars_in_base64_payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000550 def test_broken_base64_payload(self):
551 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
552 msg = Message()
553 msg['content-type'] = 'audio/x-midi'
554 msg['content-transfer-encoding'] = 'base64'
555 msg.set_payload(x)
556 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -0400557 (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0'
558 b'\xa1\x00p\xf6\xbf\xe9\x0f'))
559 self.assertIsInstance(msg.defects[0],
560 errors.InvalidBase64CharactersDefect)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000561
R David Murraya2860e82011-04-16 09:20:30 -0400562 def test_broken_unicode_payload(self):
563 # This test improves coverage but is not a compliance test.
564 # The behavior in this situation is currently undefined by the API.
565 x = 'this is a br\xf6ken thing to do'
566 msg = Message()
567 msg['content-type'] = 'text/plain'
568 msg['content-transfer-encoding'] = '8bit'
569 msg.set_payload(x)
570 self.assertEqual(msg.get_payload(decode=True),
571 bytes(x, 'raw-unicode-escape'))
572
573 def test_questionable_bytes_payload(self):
574 # This test improves coverage but is not a compliance test,
575 # since it involves poking inside the black box.
576 x = 'this is a quéstionable thing to do'.encode('utf-8')
577 msg = Message()
578 msg['content-type'] = 'text/plain; charset="utf-8"'
579 msg['content-transfer-encoding'] = '8bit'
580 msg._payload = x
581 self.assertEqual(msg.get_payload(decode=True), x)
582
R. David Murray7ec754b2010-12-13 23:51:19 +0000583 # Issue 1078919
584 def test_ascii_add_header(self):
585 msg = Message()
586 msg.add_header('Content-Disposition', 'attachment',
587 filename='bud.gif')
588 self.assertEqual('attachment; filename="bud.gif"',
589 msg['Content-Disposition'])
590
591 def test_noascii_add_header(self):
592 msg = Message()
593 msg.add_header('Content-Disposition', 'attachment',
594 filename="Fußballer.ppt")
595 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000596 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000597 msg['Content-Disposition'])
598
599 def test_nonascii_add_header_via_triple(self):
600 msg = Message()
601 msg.add_header('Content-Disposition', 'attachment',
602 filename=('iso-8859-1', '', 'Fußballer.ppt'))
603 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000604 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
605 msg['Content-Disposition'])
606
607 def test_ascii_add_header_with_tspecial(self):
608 msg = Message()
609 msg.add_header('Content-Disposition', 'attachment',
610 filename="windows [filename].ppt")
611 self.assertEqual(
612 'attachment; filename="windows [filename].ppt"',
613 msg['Content-Disposition'])
614
615 def test_nonascii_add_header_with_tspecial(self):
616 msg = Message()
617 msg.add_header('Content-Disposition', 'attachment',
618 filename="Fußballer [filename].ppt")
619 self.assertEqual(
620 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000621 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000622
R David Murray00ae4352013-08-21 21:10:31 -0400623 def test_binary_quopri_payload(self):
624 for charset in ('latin-1', 'ascii'):
625 msg = Message()
626 msg['content-type'] = 'text/plain; charset=%s' % charset
627 msg['content-transfer-encoding'] = 'quoted-printable'
628 msg.set_payload(b'foo=e6=96=87bar')
629 self.assertEqual(
630 msg.get_payload(decode=True),
631 b'foo\xe6\x96\x87bar',
632 'get_payload returns wrong result with charset %s.' % charset)
633
634 def test_binary_base64_payload(self):
635 for charset in ('latin-1', 'ascii'):
636 msg = Message()
637 msg['content-type'] = 'text/plain; charset=%s' % charset
638 msg['content-transfer-encoding'] = 'base64'
639 msg.set_payload(b'Zm9v5paHYmFy')
640 self.assertEqual(
641 msg.get_payload(decode=True),
642 b'foo\xe6\x96\x87bar',
643 'get_payload returns wrong result with charset %s.' % charset)
644
645 def test_binary_uuencode_payload(self):
646 for charset in ('latin-1', 'ascii'):
647 for encoding in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
648 msg = Message()
649 msg['content-type'] = 'text/plain; charset=%s' % charset
650 msg['content-transfer-encoding'] = encoding
651 msg.set_payload(b"begin 666 -\n)9F]OYI:'8F%R\n \nend\n")
652 self.assertEqual(
653 msg.get_payload(decode=True),
654 b'foo\xe6\x96\x87bar',
655 str(('get_payload returns wrong result ',
656 'with charset {0} and encoding {1}.')).\
657 format(charset, encoding))
658
R David Murraya2860e82011-04-16 09:20:30 -0400659 def test_add_header_with_name_only_param(self):
660 msg = Message()
661 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
662 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
663
664 def test_add_header_with_no_value(self):
665 msg = Message()
666 msg.add_header('X-Status', None)
667 self.assertEqual('', msg['X-Status'])
668
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000669 # Issue 5871: reject an attempt to embed a header inside a header value
670 # (header injection attack).
671 def test_embeded_header_via_Header_rejected(self):
672 msg = Message()
673 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
674 self.assertRaises(errors.HeaderParseError, msg.as_string)
675
676 def test_embeded_header_via_string_rejected(self):
677 msg = Message()
678 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
679 self.assertRaises(errors.HeaderParseError, msg.as_string)
680
R David Murray7441a7a2012-03-14 02:59:51 -0400681 def test_unicode_header_defaults_to_utf8_encoding(self):
682 # Issue 14291
683 m = MIMEText('abc\n')
684 m['Subject'] = 'É test'
685 self.assertEqual(str(m),textwrap.dedent("""\
686 Content-Type: text/plain; charset="us-ascii"
687 MIME-Version: 1.0
688 Content-Transfer-Encoding: 7bit
689 Subject: =?utf-8?q?=C3=89_test?=
690
691 abc
692 """))
693
R David Murray8680bcc2012-03-22 22:17:51 -0400694 def test_unicode_body_defaults_to_utf8_encoding(self):
695 # Issue 14291
696 m = MIMEText('É testabc\n')
697 self.assertEqual(str(m),textwrap.dedent("""\
R David Murray8680bcc2012-03-22 22:17:51 -0400698 Content-Type: text/plain; charset="utf-8"
R David Murray42243c42012-03-22 22:40:44 -0400699 MIME-Version: 1.0
R David Murray8680bcc2012-03-22 22:17:51 -0400700 Content-Transfer-Encoding: base64
701
702 w4kgdGVzdGFiYwo=
703 """))
704
705
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000706# Test the email.encoders module
707class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400708
709 def test_EncodersEncode_base64(self):
710 with openfile('PyBanner048.gif', 'rb') as fp:
711 bindata = fp.read()
712 mimed = email.mime.image.MIMEImage(bindata)
713 base64ed = mimed.get_payload()
714 # the transfer-encoded body lines should all be <=76 characters
715 lines = base64ed.split('\n')
716 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
717
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000718 def test_encode_empty_payload(self):
719 eq = self.assertEqual
720 msg = Message()
721 msg.set_charset('us-ascii')
722 eq(msg['content-transfer-encoding'], '7bit')
723
724 def test_default_cte(self):
725 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000726 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000727 msg = MIMEText('hello world')
728 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000729 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000730 msg = MIMEText('hello \xf8 world')
R David Murray8680bcc2012-03-22 22:17:51 -0400731 eq(msg['content-transfer-encoding'], 'base64')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000732 # And now with a different charset
733 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
734 eq(msg['content-transfer-encoding'], 'quoted-printable')
735
R. David Murraye85200d2010-05-06 01:41:14 +0000736 def test_encode7or8bit(self):
737 # Make sure a charset whose input character set is 8bit but
738 # whose output character set is 7bit gets a transfer-encoding
739 # of 7bit.
740 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000741 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000742 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000743
R David Murrayf581b372013-02-05 10:49:49 -0500744 def test_qp_encode_latin1(self):
745 msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1')
746 self.assertEqual(str(msg), textwrap.dedent("""\
747 MIME-Version: 1.0
748 Content-Type: text/text; charset="iso-8859-1"
749 Content-Transfer-Encoding: quoted-printable
750
751 =E1=F6
752 """))
753
754 def test_qp_encode_non_latin1(self):
755 # Issue 16948
756 msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2')
757 self.assertEqual(str(msg), textwrap.dedent("""\
758 MIME-Version: 1.0
759 Content-Type: text/text; charset="iso-8859-2"
760 Content-Transfer-Encoding: quoted-printable
761
762 =BF
763 """))
764
Ezio Melottib3aedd42010-11-20 19:04:17 +0000765
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000766# Test long header wrapping
767class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400768
769 maxDiff = None
770
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000771 def test_split_long_continuation(self):
772 eq = self.ndiffAssertEqual
773 msg = email.message_from_string("""\
774Subject: bug demonstration
775\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
776\tmore text
777
778test
779""")
780 sfp = StringIO()
781 g = Generator(sfp)
782 g.flatten(msg)
783 eq(sfp.getvalue(), """\
784Subject: bug demonstration
785\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
786\tmore text
787
788test
789""")
790
791 def test_another_long_almost_unsplittable_header(self):
792 eq = self.ndiffAssertEqual
793 hstr = """\
794bug demonstration
795\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
796\tmore text"""
797 h = Header(hstr, continuation_ws='\t')
798 eq(h.encode(), """\
799bug demonstration
800\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
801\tmore text""")
802 h = Header(hstr.replace('\t', ' '))
803 eq(h.encode(), """\
804bug demonstration
805 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
806 more text""")
807
808 def test_long_nonstring(self):
809 eq = self.ndiffAssertEqual
810 g = Charset("iso-8859-1")
811 cz = Charset("iso-8859-2")
812 utf8 = Charset("utf-8")
813 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
814 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
815 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
816 b'bef\xf6rdert. ')
817 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
818 b'd\xf9vtipu.. ')
819 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
820 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
821 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
822 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
823 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
824 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
825 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
826 '\u3044\u307e\u3059\u3002')
827 h = Header(g_head, g, header_name='Subject')
828 h.append(cz_head, cz)
829 h.append(utf8_head, utf8)
830 msg = Message()
831 msg['Subject'] = h
832 sfp = StringIO()
833 g = Generator(sfp)
834 g.flatten(msg)
835 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000836Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
837 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
838 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
839 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
840 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
841 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
842 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
843 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
844 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
845 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
846 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000847
848""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000849 eq(h.encode(maxlinelen=76), """\
850=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
851 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
852 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
853 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
854 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
855 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
856 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
857 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
858 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
859 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
860 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000861
862 def test_long_header_encode(self):
863 eq = self.ndiffAssertEqual
864 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
865 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
866 header_name='X-Foobar-Spoink-Defrobnit')
867 eq(h.encode(), '''\
868wasnipoop; giraffes="very-long-necked-animals";
869 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
870
871 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
872 eq = self.ndiffAssertEqual
873 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
874 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
875 header_name='X-Foobar-Spoink-Defrobnit',
876 continuation_ws='\t')
877 eq(h.encode(), '''\
878wasnipoop; giraffes="very-long-necked-animals";
879 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
880
881 def test_long_header_encode_with_tab_continuation(self):
882 eq = self.ndiffAssertEqual
883 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
884 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
885 header_name='X-Foobar-Spoink-Defrobnit',
886 continuation_ws='\t')
887 eq(h.encode(), '''\
888wasnipoop; giraffes="very-long-necked-animals";
889\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
890
R David Murray3a6152f2011-03-14 21:13:03 -0400891 def test_header_encode_with_different_output_charset(self):
892 h = Header('文', 'euc-jp')
893 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
894
895 def test_long_header_encode_with_different_output_charset(self):
896 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
897 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
898 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
899 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
900 res = """\
901=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
902 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
903 self.assertEqual(h.encode(), res)
904
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000905 def test_header_splitter(self):
906 eq = self.ndiffAssertEqual
907 msg = MIMEText('')
908 # It'd be great if we could use add_header() here, but that doesn't
909 # guarantee an order of the parameters.
910 msg['X-Foobar-Spoink-Defrobnit'] = (
911 'wasnipoop; giraffes="very-long-necked-animals"; '
912 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
913 sfp = StringIO()
914 g = Generator(sfp)
915 g.flatten(msg)
916 eq(sfp.getvalue(), '''\
917Content-Type: text/plain; charset="us-ascii"
918MIME-Version: 1.0
919Content-Transfer-Encoding: 7bit
920X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
921 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
922
923''')
924
925 def test_no_semis_header_splitter(self):
926 eq = self.ndiffAssertEqual
927 msg = Message()
928 msg['From'] = 'test@dom.ain'
929 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
930 msg.set_payload('Test')
931 sfp = StringIO()
932 g = Generator(sfp)
933 g.flatten(msg)
934 eq(sfp.getvalue(), """\
935From: test@dom.ain
936References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
937 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
938
939Test""")
940
R David Murray7da4db12011-04-07 20:37:17 -0400941 def test_last_split_chunk_does_not_fit(self):
942 eq = self.ndiffAssertEqual
943 h = Header('Subject: the first part of this is short, but_the_second'
944 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
945 '_all_by_itself')
946 eq(h.encode(), """\
947Subject: the first part of this is short,
948 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
949
950 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
951 eq = self.ndiffAssertEqual
952 h = Header(', but_the_second'
953 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
954 '_all_by_itself')
955 eq(h.encode(), """\
956,
957 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
958
959 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
960 eq = self.ndiffAssertEqual
961 h = Header(', , but_the_second'
962 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
963 '_all_by_itself')
964 eq(h.encode(), """\
965, ,
966 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
967
968 def test_trailing_splitable_on_overlong_unsplitable(self):
969 eq = self.ndiffAssertEqual
970 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
971 'be_on_a_line_all_by_itself;')
972 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
973 "be_on_a_line_all_by_itself;")
974
975 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
976 eq = self.ndiffAssertEqual
977 h = Header('; '
978 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400979 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400980 eq(h.encode(), """\
981;
R David Murray01581ee2011-04-18 10:04:34 -0400982 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400983
R David Murraye1292a22011-04-07 20:54:03 -0400984 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400985 eq = self.ndiffAssertEqual
986 h = Header('This is a long line that has two whitespaces in a row. '
987 'This used to cause truncation of the header when folded')
988 eq(h.encode(), """\
989This is a long line that has two whitespaces in a row. This used to cause
990 truncation of the header when folded""")
991
Ezio Melotti1c4810b2013-08-10 18:57:12 +0300992 def test_splitter_split_on_punctuation_only_if_fws_with_header(self):
R David Murray01581ee2011-04-18 10:04:34 -0400993 eq = self.ndiffAssertEqual
994 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
995 'they;arenotlegal;fold,points')
996 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
997 "arenotlegal;fold,points")
998
999 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
1000 eq = self.ndiffAssertEqual
1001 h = Header('this is a test where we need to have more than one line '
1002 'before; our final line that is just too big to fit;; '
1003 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1004 'be_on_a_line_all_by_itself;')
1005 eq(h.encode(), """\
1006this is a test where we need to have more than one line before;
1007 our final line that is just too big to fit;;
1008 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
1009
1010 def test_overlong_last_part_followed_by_split_point(self):
1011 eq = self.ndiffAssertEqual
1012 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1013 'be_on_a_line_all_by_itself ')
1014 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
1015 "should_be_on_a_line_all_by_itself ")
1016
1017 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
1018 eq = self.ndiffAssertEqual
1019 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
1020 'before_our_final_line_; ; '
1021 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1022 'be_on_a_line_all_by_itself; ')
1023 eq(h.encode(), """\
1024this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
1025 ;
1026 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1027
1028 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
1029 eq = self.ndiffAssertEqual
1030 h = Header('this is a test where we need to have more than one line '
1031 'before our final line; ; '
1032 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
1033 'be_on_a_line_all_by_itself; ')
1034 eq(h.encode(), """\
1035this is a test where we need to have more than one line before our final line;
1036 ;
1037 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1038
1039 def test_long_header_with_whitespace_runs(self):
1040 eq = self.ndiffAssertEqual
1041 msg = Message()
1042 msg['From'] = 'test@dom.ain'
1043 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
1044 msg.set_payload('Test')
1045 sfp = StringIO()
1046 g = Generator(sfp)
1047 g.flatten(msg)
1048 eq(sfp.getvalue(), """\
1049From: test@dom.ain
1050References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1051 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1052 <foo@dom.ain> <foo@dom.ain>\x20\x20
1053
1054Test""")
1055
1056 def test_long_run_with_semi_header_splitter(self):
1057 eq = self.ndiffAssertEqual
1058 msg = Message()
1059 msg['From'] = 'test@dom.ain'
1060 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
1061 msg.set_payload('Test')
1062 sfp = StringIO()
1063 g = Generator(sfp)
1064 g.flatten(msg)
1065 eq(sfp.getvalue(), """\
1066From: test@dom.ain
1067References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1068 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1069 <foo@dom.ain>; abc
1070
1071Test""")
1072
1073 def test_splitter_split_on_punctuation_only_if_fws(self):
1074 eq = self.ndiffAssertEqual
1075 msg = Message()
1076 msg['From'] = 'test@dom.ain'
1077 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
1078 'they;arenotlegal;fold,points')
1079 msg.set_payload('Test')
1080 sfp = StringIO()
1081 g = Generator(sfp)
1082 g.flatten(msg)
1083 # XXX the space after the header should not be there.
1084 eq(sfp.getvalue(), """\
1085From: test@dom.ain
1086References:\x20
1087 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
1088
1089Test""")
1090
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001091 def test_no_split_long_header(self):
1092 eq = self.ndiffAssertEqual
1093 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +00001094 h = Header(hstr)
1095 # These come on two lines because Headers are really field value
1096 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001097 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001098References:
1099 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1100 h = Header('x' * 80)
1101 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001102
1103 def test_splitting_multiple_long_lines(self):
1104 eq = self.ndiffAssertEqual
1105 hstr = """\
1106from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1107\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1108\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1109"""
1110 h = Header(hstr, continuation_ws='\t')
1111 eq(h.encode(), """\
1112from babylon.socal-raves.org (localhost [127.0.0.1]);
1113 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1114 for <mailman-admin@babylon.socal-raves.org>;
1115 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1116\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1117 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1118 for <mailman-admin@babylon.socal-raves.org>;
1119 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1120\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1121 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1122 for <mailman-admin@babylon.socal-raves.org>;
1123 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1124
1125 def test_splitting_first_line_only_is_long(self):
1126 eq = self.ndiffAssertEqual
1127 hstr = """\
1128from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1129\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1130\tid 17k4h5-00034i-00
1131\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1132 h = Header(hstr, maxlinelen=78, header_name='Received',
1133 continuation_ws='\t')
1134 eq(h.encode(), """\
1135from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1136 helo=cthulhu.gerg.ca)
1137\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1138\tid 17k4h5-00034i-00
1139\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1140
1141 def test_long_8bit_header(self):
1142 eq = self.ndiffAssertEqual
1143 msg = Message()
1144 h = Header('Britische Regierung gibt', 'iso-8859-1',
1145 header_name='Subject')
1146 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001147 eq(h.encode(maxlinelen=76), """\
1148=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1149 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001150 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001151 eq(msg.as_string(maxheaderlen=76), """\
1152Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1153 =?iso-8859-1?q?hore-Windkraftprojekte?=
1154
1155""")
1156 eq(msg.as_string(maxheaderlen=0), """\
1157Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001158
1159""")
1160
1161 def test_long_8bit_header_no_charset(self):
1162 eq = self.ndiffAssertEqual
1163 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001164 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1165 'f\xfcr Offshore-Windkraftprojekte '
1166 '<a-very-long-address@example.com>')
1167 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001168 eq(msg.as_string(maxheaderlen=78), """\
1169Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1170 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1171
1172""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001173 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001174 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001175 header_name='Reply-To')
1176 eq(msg.as_string(maxheaderlen=78), """\
1177Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1178 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001179
1180""")
1181
1182 def test_long_to_header(self):
1183 eq = self.ndiffAssertEqual
1184 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001185 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001186 '"Someone Test #B" <someone@umich.edu>, '
1187 '"Someone Test #C" <someone@eecs.umich.edu>, '
1188 '"Someone Test #D" <someone@eecs.umich.edu>')
1189 msg = Message()
1190 msg['To'] = to
1191 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001192To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001193 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001194 "Someone Test #C" <someone@eecs.umich.edu>,
1195 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001196
1197''')
1198
1199 def test_long_line_after_append(self):
1200 eq = self.ndiffAssertEqual
1201 s = 'This is an example of string which has almost the limit of header length.'
1202 h = Header(s)
1203 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001204 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001205This is an example of string which has almost the limit of header length.
1206 Add another line.""")
1207
1208 def test_shorter_line_with_append(self):
1209 eq = self.ndiffAssertEqual
1210 s = 'This is a shorter line.'
1211 h = Header(s)
1212 h.append('Add another sentence. (Surprise?)')
1213 eq(h.encode(),
1214 'This is a shorter line. Add another sentence. (Surprise?)')
1215
1216 def test_long_field_name(self):
1217 eq = self.ndiffAssertEqual
1218 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001219 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1220 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1221 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1222 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001223 h = Header(gs, 'iso-8859-1', header_name=fn)
1224 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001225 eq(h.encode(maxlinelen=76), """\
1226=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1227 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1228 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1229 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001230
1231 def test_long_received_header(self):
1232 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1233 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1234 'Wed, 05 Mar 2003 18:10:18 -0700')
1235 msg = Message()
1236 msg['Received-1'] = Header(h, continuation_ws='\t')
1237 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001238 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001239 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001240Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1241 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001242 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001243Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1244 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001245 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001246
1247""")
1248
1249 def test_string_headerinst_eq(self):
1250 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1251 'tu-muenchen.de> (David Bremner\'s message of '
1252 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1253 msg = Message()
1254 msg['Received-1'] = Header(h, header_name='Received-1',
1255 continuation_ws='\t')
1256 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001257 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001258 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001259Received-1:\x20
1260 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1261 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1262Received-2:\x20
1263 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1264 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001265
1266""")
1267
1268 def test_long_unbreakable_lines_with_continuation(self):
1269 eq = self.ndiffAssertEqual
1270 msg = Message()
1271 t = """\
1272iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1273 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1274 msg['Face-1'] = t
1275 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001276 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001277 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001278 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001279 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001280Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001281 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001282 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001283Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001284 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001285 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001286Face-3:\x20
1287 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1288 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001289
1290""")
1291
1292 def test_another_long_multiline_header(self):
1293 eq = self.ndiffAssertEqual
1294 m = ('Received: from siimage.com '
1295 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001296 'Microsoft SMTPSVC(5.0.2195.4905); '
1297 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001298 msg = email.message_from_string(m)
1299 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001300Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1301 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001302
1303''')
1304
1305 def test_long_lines_with_different_header(self):
1306 eq = self.ndiffAssertEqual
1307 h = ('List-Unsubscribe: '
1308 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1309 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1310 '?subject=unsubscribe>')
1311 msg = Message()
1312 msg['List'] = h
1313 msg['List'] = Header(h, header_name='List')
1314 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001315List: List-Unsubscribe:
1316 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001317 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001318List: List-Unsubscribe:
1319 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001320 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001321
1322""")
1323
R. David Murray6f0022d2011-01-07 21:57:25 +00001324 def test_long_rfc2047_header_with_embedded_fws(self):
1325 h = Header(textwrap.dedent("""\
1326 We're going to pretend this header is in a non-ascii character set
1327 \tto see if line wrapping with encoded words and embedded
1328 folding white space works"""),
1329 charset='utf-8',
1330 header_name='Test')
1331 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1332 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1333 =?utf-8?q?cter_set?=
1334 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1335 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1336
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001337
Ezio Melottib3aedd42010-11-20 19:04:17 +00001338
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001339# Test mangling of "From " lines in the body of a message
1340class TestFromMangling(unittest.TestCase):
1341 def setUp(self):
1342 self.msg = Message()
1343 self.msg['From'] = 'aaa@bbb.org'
1344 self.msg.set_payload("""\
1345From the desk of A.A.A.:
1346Blah blah blah
1347""")
1348
1349 def test_mangled_from(self):
1350 s = StringIO()
1351 g = Generator(s, mangle_from_=True)
1352 g.flatten(self.msg)
1353 self.assertEqual(s.getvalue(), """\
1354From: aaa@bbb.org
1355
1356>From the desk of A.A.A.:
1357Blah blah blah
1358""")
1359
1360 def test_dont_mangle_from(self):
1361 s = StringIO()
1362 g = Generator(s, mangle_from_=False)
1363 g.flatten(self.msg)
1364 self.assertEqual(s.getvalue(), """\
1365From: aaa@bbb.org
1366
1367From the desk of A.A.A.:
1368Blah blah blah
1369""")
1370
R David Murray6a31bc62012-07-22 21:47:53 -04001371 def test_mangle_from_in_preamble_and_epilog(self):
1372 s = StringIO()
1373 g = Generator(s, mangle_from_=True)
1374 msg = email.message_from_string(textwrap.dedent("""\
1375 From: foo@bar.com
1376 Mime-Version: 1.0
1377 Content-Type: multipart/mixed; boundary=XXX
1378
1379 From somewhere unknown
1380
1381 --XXX
1382 Content-Type: text/plain
1383
1384 foo
1385
1386 --XXX--
1387
1388 From somewhere unknowable
1389 """))
1390 g.flatten(msg)
1391 self.assertEqual(len([1 for x in s.getvalue().split('\n')
1392 if x.startswith('>From ')]), 2)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001393
R David Murray638d40b2012-08-24 11:14:13 -04001394 def test_mangled_from_with_bad_bytes(self):
1395 source = textwrap.dedent("""\
1396 Content-Type: text/plain; charset="utf-8"
1397 MIME-Version: 1.0
1398 Content-Transfer-Encoding: 8bit
1399 From: aaa@bbb.org
1400
1401 """).encode('utf-8')
1402 msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')
1403 b = BytesIO()
1404 g = BytesGenerator(b, mangle_from_=True)
1405 g.flatten(msg)
1406 self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')
1407
Ezio Melottib3aedd42010-11-20 19:04:17 +00001408
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001409# Test the basic MIMEAudio class
1410class TestMIMEAudio(unittest.TestCase):
1411 def setUp(self):
R David Murray28346b82011-03-31 11:40:20 -04001412 with openfile('audiotest.au', 'rb') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001413 self._audiodata = fp.read()
1414 self._au = MIMEAudio(self._audiodata)
1415
1416 def test_guess_minor_type(self):
1417 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1418
1419 def test_encoding(self):
1420 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001421 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1422 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001423
1424 def test_checkSetMinor(self):
1425 au = MIMEAudio(self._audiodata, 'fish')
1426 self.assertEqual(au.get_content_type(), 'audio/fish')
1427
1428 def test_add_header(self):
1429 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001430 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001431 self._au.add_header('Content-Disposition', 'attachment',
1432 filename='audiotest.au')
1433 eq(self._au['content-disposition'],
1434 'attachment; filename="audiotest.au"')
1435 eq(self._au.get_params(header='content-disposition'),
1436 [('attachment', ''), ('filename', 'audiotest.au')])
1437 eq(self._au.get_param('filename', header='content-disposition'),
1438 'audiotest.au')
1439 missing = []
1440 eq(self._au.get_param('attachment', header='content-disposition'), '')
1441 unless(self._au.get_param('foo', failobj=missing,
1442 header='content-disposition') is missing)
1443 # Try some missing stuff
1444 unless(self._au.get_param('foobar', missing) is missing)
1445 unless(self._au.get_param('attachment', missing,
1446 header='foobar') is missing)
1447
1448
Ezio Melottib3aedd42010-11-20 19:04:17 +00001449
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001450# Test the basic MIMEImage class
1451class TestMIMEImage(unittest.TestCase):
1452 def setUp(self):
1453 with openfile('PyBanner048.gif', 'rb') as fp:
1454 self._imgdata = fp.read()
1455 self._im = MIMEImage(self._imgdata)
1456
1457 def test_guess_minor_type(self):
1458 self.assertEqual(self._im.get_content_type(), 'image/gif')
1459
1460 def test_encoding(self):
1461 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001462 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1463 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001464
1465 def test_checkSetMinor(self):
1466 im = MIMEImage(self._imgdata, 'fish')
1467 self.assertEqual(im.get_content_type(), 'image/fish')
1468
1469 def test_add_header(self):
1470 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001471 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001472 self._im.add_header('Content-Disposition', 'attachment',
1473 filename='dingusfish.gif')
1474 eq(self._im['content-disposition'],
1475 'attachment; filename="dingusfish.gif"')
1476 eq(self._im.get_params(header='content-disposition'),
1477 [('attachment', ''), ('filename', 'dingusfish.gif')])
1478 eq(self._im.get_param('filename', header='content-disposition'),
1479 'dingusfish.gif')
1480 missing = []
1481 eq(self._im.get_param('attachment', header='content-disposition'), '')
1482 unless(self._im.get_param('foo', failobj=missing,
1483 header='content-disposition') is missing)
1484 # Try some missing stuff
1485 unless(self._im.get_param('foobar', missing) is missing)
1486 unless(self._im.get_param('attachment', missing,
1487 header='foobar') is missing)
1488
1489
Ezio Melottib3aedd42010-11-20 19:04:17 +00001490
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001491# Test the basic MIMEApplication class
1492class TestMIMEApplication(unittest.TestCase):
1493 def test_headers(self):
1494 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001495 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001496 eq(msg.get_content_type(), 'application/octet-stream')
1497 eq(msg['content-transfer-encoding'], 'base64')
1498
1499 def test_body(self):
1500 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001501 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1502 msg = MIMEApplication(bytesdata)
1503 # whitespace in the cte encoded block is RFC-irrelevant.
1504 eq(msg.get_payload().strip(), '+vv8/f7/')
1505 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001506
R David Murrayec317a82013-02-11 10:51:28 -05001507 def test_binary_body_with_encode_7or8bit(self):
1508 # Issue 17171.
1509 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1510 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit)
1511 # Treated as a string, this will be invalid code points.
1512 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1513 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1514 self.assertEqual(msg['Content-Transfer-Encoding'], '8bit')
1515 s = BytesIO()
1516 g = BytesGenerator(s)
1517 g.flatten(msg)
1518 wireform = s.getvalue()
1519 msg2 = email.message_from_bytes(wireform)
1520 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1521 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1522 self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit')
1523
1524 def test_binary_body_with_encode_noop(self):
R David Murrayceaa8b12013-02-09 13:02:58 -05001525 # Issue 16564: This does not produce an RFC valid message, since to be
1526 # valid it should have a CTE of binary. But the below works in
1527 # Python2, and is documented as working this way.
1528 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1529 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1530 # Treated as a string, this will be invalid code points.
1531 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1532 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1533 s = BytesIO()
1534 g = BytesGenerator(s)
1535 g.flatten(msg)
1536 wireform = s.getvalue()
1537 msg2 = email.message_from_bytes(wireform)
1538 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1539 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001540
R David Murrayf6069f92013-06-27 18:37:00 -04001541 def test_binary_body_with_encode_quopri(self):
1542 # Issue 14360.
1543 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff '
1544 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_quopri)
1545 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1546 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1547 self.assertEqual(msg['Content-Transfer-Encoding'], 'quoted-printable')
1548 s = BytesIO()
1549 g = BytesGenerator(s)
1550 g.flatten(msg)
1551 wireform = s.getvalue()
1552 msg2 = email.message_from_bytes(wireform)
1553 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1554 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1555 self.assertEqual(msg2['Content-Transfer-Encoding'], 'quoted-printable')
1556
1557 def test_binary_body_with_encode_base64(self):
1558 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1559 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_base64)
1560 self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1561 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1562 s = BytesIO()
1563 g = BytesGenerator(s)
1564 g.flatten(msg)
1565 wireform = s.getvalue()
1566 msg2 = email.message_from_bytes(wireform)
1567 self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1568 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1569
Ezio Melottib3aedd42010-11-20 19:04:17 +00001570
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001571# Test the basic MIMEText class
1572class TestMIMEText(unittest.TestCase):
1573 def setUp(self):
1574 self._msg = MIMEText('hello there')
1575
1576 def test_types(self):
1577 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001578 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001579 eq(self._msg.get_content_type(), 'text/plain')
1580 eq(self._msg.get_param('charset'), 'us-ascii')
1581 missing = []
1582 unless(self._msg.get_param('foobar', missing) is missing)
1583 unless(self._msg.get_param('charset', missing, header='foobar')
1584 is missing)
1585
1586 def test_payload(self):
1587 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001588 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001589
1590 def test_charset(self):
1591 eq = self.assertEqual
1592 msg = MIMEText('hello there', _charset='us-ascii')
1593 eq(msg.get_charset().input_charset, 'us-ascii')
1594 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1595
R. David Murray850fc852010-06-03 01:58:28 +00001596 def test_7bit_input(self):
1597 eq = self.assertEqual
1598 msg = MIMEText('hello there', _charset='us-ascii')
1599 eq(msg.get_charset().input_charset, 'us-ascii')
1600 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1601
1602 def test_7bit_input_no_charset(self):
1603 eq = self.assertEqual
1604 msg = MIMEText('hello there')
1605 eq(msg.get_charset(), 'us-ascii')
1606 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1607 self.assertTrue('hello there' in msg.as_string())
1608
1609 def test_utf8_input(self):
1610 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1611 eq = self.assertEqual
1612 msg = MIMEText(teststr, _charset='utf-8')
1613 eq(msg.get_charset().output_charset, 'utf-8')
1614 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1615 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1616
1617 @unittest.skip("can't fix because of backward compat in email5, "
1618 "will fix in email6")
1619 def test_utf8_input_no_charset(self):
1620 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1621 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1622
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001623
Ezio Melottib3aedd42010-11-20 19:04:17 +00001624
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001625# Test complicated multipart/* messages
1626class TestMultipart(TestEmailBase):
1627 def setUp(self):
1628 with openfile('PyBanner048.gif', 'rb') as fp:
1629 data = fp.read()
1630 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1631 image = MIMEImage(data, name='dingusfish.gif')
1632 image.add_header('content-disposition', 'attachment',
1633 filename='dingusfish.gif')
1634 intro = MIMEText('''\
1635Hi there,
1636
1637This is the dingus fish.
1638''')
1639 container.attach(intro)
1640 container.attach(image)
1641 container['From'] = 'Barry <barry@digicool.com>'
1642 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1643 container['Subject'] = 'Here is your dingus fish'
1644
1645 now = 987809702.54848599
1646 timetuple = time.localtime(now)
1647 if timetuple[-1] == 0:
1648 tzsecs = time.timezone
1649 else:
1650 tzsecs = time.altzone
1651 if tzsecs > 0:
1652 sign = '-'
1653 else:
1654 sign = '+'
1655 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1656 container['Date'] = time.strftime(
1657 '%a, %d %b %Y %H:%M:%S',
1658 time.localtime(now)) + tzoffset
1659 self._msg = container
1660 self._im = image
1661 self._txt = intro
1662
1663 def test_hierarchy(self):
1664 # convenience
1665 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001666 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001667 raises = self.assertRaises
1668 # tests
1669 m = self._msg
1670 unless(m.is_multipart())
1671 eq(m.get_content_type(), 'multipart/mixed')
1672 eq(len(m.get_payload()), 2)
1673 raises(IndexError, m.get_payload, 2)
1674 m0 = m.get_payload(0)
1675 m1 = m.get_payload(1)
1676 unless(m0 is self._txt)
1677 unless(m1 is self._im)
1678 eq(m.get_payload(), [m0, m1])
1679 unless(not m0.is_multipart())
1680 unless(not m1.is_multipart())
1681
1682 def test_empty_multipart_idempotent(self):
1683 text = """\
1684Content-Type: multipart/mixed; boundary="BOUNDARY"
1685MIME-Version: 1.0
1686Subject: A subject
1687To: aperson@dom.ain
1688From: bperson@dom.ain
1689
1690
1691--BOUNDARY
1692
1693
1694--BOUNDARY--
1695"""
1696 msg = Parser().parsestr(text)
1697 self.ndiffAssertEqual(text, msg.as_string())
1698
1699 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1700 outer = MIMEBase('multipart', 'mixed')
1701 outer['Subject'] = 'A subject'
1702 outer['To'] = 'aperson@dom.ain'
1703 outer['From'] = 'bperson@dom.ain'
1704 outer.set_boundary('BOUNDARY')
1705 self.ndiffAssertEqual(outer.as_string(), '''\
1706Content-Type: multipart/mixed; boundary="BOUNDARY"
1707MIME-Version: 1.0
1708Subject: A subject
1709To: aperson@dom.ain
1710From: bperson@dom.ain
1711
1712--BOUNDARY
1713
1714--BOUNDARY--''')
1715
1716 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1717 outer = MIMEBase('multipart', 'mixed')
1718 outer['Subject'] = 'A subject'
1719 outer['To'] = 'aperson@dom.ain'
1720 outer['From'] = 'bperson@dom.ain'
1721 outer.preamble = ''
1722 outer.epilogue = ''
1723 outer.set_boundary('BOUNDARY')
1724 self.ndiffAssertEqual(outer.as_string(), '''\
1725Content-Type: multipart/mixed; boundary="BOUNDARY"
1726MIME-Version: 1.0
1727Subject: A subject
1728To: aperson@dom.ain
1729From: bperson@dom.ain
1730
1731
1732--BOUNDARY
1733
1734--BOUNDARY--
1735''')
1736
1737 def test_one_part_in_a_multipart(self):
1738 eq = self.ndiffAssertEqual
1739 outer = MIMEBase('multipart', 'mixed')
1740 outer['Subject'] = 'A subject'
1741 outer['To'] = 'aperson@dom.ain'
1742 outer['From'] = 'bperson@dom.ain'
1743 outer.set_boundary('BOUNDARY')
1744 msg = MIMEText('hello world')
1745 outer.attach(msg)
1746 eq(outer.as_string(), '''\
1747Content-Type: multipart/mixed; boundary="BOUNDARY"
1748MIME-Version: 1.0
1749Subject: A subject
1750To: aperson@dom.ain
1751From: bperson@dom.ain
1752
1753--BOUNDARY
1754Content-Type: text/plain; charset="us-ascii"
1755MIME-Version: 1.0
1756Content-Transfer-Encoding: 7bit
1757
1758hello world
1759--BOUNDARY--''')
1760
1761 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1762 eq = self.ndiffAssertEqual
1763 outer = MIMEBase('multipart', 'mixed')
1764 outer['Subject'] = 'A subject'
1765 outer['To'] = 'aperson@dom.ain'
1766 outer['From'] = 'bperson@dom.ain'
1767 outer.preamble = ''
1768 msg = MIMEText('hello world')
1769 outer.attach(msg)
1770 outer.set_boundary('BOUNDARY')
1771 eq(outer.as_string(), '''\
1772Content-Type: multipart/mixed; boundary="BOUNDARY"
1773MIME-Version: 1.0
1774Subject: A subject
1775To: aperson@dom.ain
1776From: bperson@dom.ain
1777
1778
1779--BOUNDARY
1780Content-Type: text/plain; charset="us-ascii"
1781MIME-Version: 1.0
1782Content-Transfer-Encoding: 7bit
1783
1784hello world
1785--BOUNDARY--''')
1786
1787
1788 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1789 eq = self.ndiffAssertEqual
1790 outer = MIMEBase('multipart', 'mixed')
1791 outer['Subject'] = 'A subject'
1792 outer['To'] = 'aperson@dom.ain'
1793 outer['From'] = 'bperson@dom.ain'
1794 outer.preamble = None
1795 msg = MIMEText('hello world')
1796 outer.attach(msg)
1797 outer.set_boundary('BOUNDARY')
1798 eq(outer.as_string(), '''\
1799Content-Type: multipart/mixed; boundary="BOUNDARY"
1800MIME-Version: 1.0
1801Subject: A subject
1802To: aperson@dom.ain
1803From: bperson@dom.ain
1804
1805--BOUNDARY
1806Content-Type: text/plain; charset="us-ascii"
1807MIME-Version: 1.0
1808Content-Transfer-Encoding: 7bit
1809
1810hello world
1811--BOUNDARY--''')
1812
1813
1814 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1815 eq = self.ndiffAssertEqual
1816 outer = MIMEBase('multipart', 'mixed')
1817 outer['Subject'] = 'A subject'
1818 outer['To'] = 'aperson@dom.ain'
1819 outer['From'] = 'bperson@dom.ain'
1820 outer.epilogue = None
1821 msg = MIMEText('hello world')
1822 outer.attach(msg)
1823 outer.set_boundary('BOUNDARY')
1824 eq(outer.as_string(), '''\
1825Content-Type: multipart/mixed; boundary="BOUNDARY"
1826MIME-Version: 1.0
1827Subject: A subject
1828To: aperson@dom.ain
1829From: bperson@dom.ain
1830
1831--BOUNDARY
1832Content-Type: text/plain; charset="us-ascii"
1833MIME-Version: 1.0
1834Content-Transfer-Encoding: 7bit
1835
1836hello world
1837--BOUNDARY--''')
1838
1839
1840 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1841 eq = self.ndiffAssertEqual
1842 outer = MIMEBase('multipart', 'mixed')
1843 outer['Subject'] = 'A subject'
1844 outer['To'] = 'aperson@dom.ain'
1845 outer['From'] = 'bperson@dom.ain'
1846 outer.epilogue = ''
1847 msg = MIMEText('hello world')
1848 outer.attach(msg)
1849 outer.set_boundary('BOUNDARY')
1850 eq(outer.as_string(), '''\
1851Content-Type: multipart/mixed; boundary="BOUNDARY"
1852MIME-Version: 1.0
1853Subject: A subject
1854To: aperson@dom.ain
1855From: bperson@dom.ain
1856
1857--BOUNDARY
1858Content-Type: text/plain; charset="us-ascii"
1859MIME-Version: 1.0
1860Content-Transfer-Encoding: 7bit
1861
1862hello world
1863--BOUNDARY--
1864''')
1865
1866
1867 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1868 eq = self.ndiffAssertEqual
1869 outer = MIMEBase('multipart', 'mixed')
1870 outer['Subject'] = 'A subject'
1871 outer['To'] = 'aperson@dom.ain'
1872 outer['From'] = 'bperson@dom.ain'
1873 outer.epilogue = '\n'
1874 msg = MIMEText('hello world')
1875 outer.attach(msg)
1876 outer.set_boundary('BOUNDARY')
1877 eq(outer.as_string(), '''\
1878Content-Type: multipart/mixed; boundary="BOUNDARY"
1879MIME-Version: 1.0
1880Subject: A subject
1881To: aperson@dom.ain
1882From: bperson@dom.ain
1883
1884--BOUNDARY
1885Content-Type: text/plain; charset="us-ascii"
1886MIME-Version: 1.0
1887Content-Transfer-Encoding: 7bit
1888
1889hello world
1890--BOUNDARY--
1891
1892''')
1893
1894 def test_message_external_body(self):
1895 eq = self.assertEqual
1896 msg = self._msgobj('msg_36.txt')
1897 eq(len(msg.get_payload()), 2)
1898 msg1 = msg.get_payload(1)
1899 eq(msg1.get_content_type(), 'multipart/alternative')
1900 eq(len(msg1.get_payload()), 2)
1901 for subpart in msg1.get_payload():
1902 eq(subpart.get_content_type(), 'message/external-body')
1903 eq(len(subpart.get_payload()), 1)
1904 subsubpart = subpart.get_payload(0)
1905 eq(subsubpart.get_content_type(), 'text/plain')
1906
1907 def test_double_boundary(self):
1908 # msg_37.txt is a multipart that contains two dash-boundary's in a
1909 # row. Our interpretation of RFC 2046 calls for ignoring the second
1910 # and subsequent boundaries.
1911 msg = self._msgobj('msg_37.txt')
1912 self.assertEqual(len(msg.get_payload()), 3)
1913
1914 def test_nested_inner_contains_outer_boundary(self):
1915 eq = self.ndiffAssertEqual
1916 # msg_38.txt has an inner part that contains outer boundaries. My
1917 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1918 # these are illegal and should be interpreted as unterminated inner
1919 # parts.
1920 msg = self._msgobj('msg_38.txt')
1921 sfp = StringIO()
1922 iterators._structure(msg, sfp)
1923 eq(sfp.getvalue(), """\
1924multipart/mixed
1925 multipart/mixed
1926 multipart/alternative
1927 text/plain
1928 text/plain
1929 text/plain
1930 text/plain
1931""")
1932
1933 def test_nested_with_same_boundary(self):
1934 eq = self.ndiffAssertEqual
1935 # msg 39.txt is similarly evil in that it's got inner parts that use
1936 # the same boundary as outer parts. Again, I believe the way this is
1937 # parsed is closest to the spirit of RFC 2046
1938 msg = self._msgobj('msg_39.txt')
1939 sfp = StringIO()
1940 iterators._structure(msg, sfp)
1941 eq(sfp.getvalue(), """\
1942multipart/mixed
1943 multipart/mixed
1944 multipart/alternative
1945 application/octet-stream
1946 application/octet-stream
1947 text/plain
1948""")
1949
1950 def test_boundary_in_non_multipart(self):
1951 msg = self._msgobj('msg_40.txt')
1952 self.assertEqual(msg.as_string(), '''\
1953MIME-Version: 1.0
1954Content-Type: text/html; boundary="--961284236552522269"
1955
1956----961284236552522269
1957Content-Type: text/html;
1958Content-Transfer-Encoding: 7Bit
1959
1960<html></html>
1961
1962----961284236552522269--
1963''')
1964
1965 def test_boundary_with_leading_space(self):
1966 eq = self.assertEqual
1967 msg = email.message_from_string('''\
1968MIME-Version: 1.0
1969Content-Type: multipart/mixed; boundary=" XXXX"
1970
1971-- XXXX
1972Content-Type: text/plain
1973
1974
1975-- XXXX
1976Content-Type: text/plain
1977
1978-- XXXX--
1979''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001980 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001981 eq(msg.get_boundary(), ' XXXX')
1982 eq(len(msg.get_payload()), 2)
1983
1984 def test_boundary_without_trailing_newline(self):
1985 m = Parser().parsestr("""\
1986Content-Type: multipart/mixed; boundary="===============0012394164=="
1987MIME-Version: 1.0
1988
1989--===============0012394164==
1990Content-Type: image/file1.jpg
1991MIME-Version: 1.0
1992Content-Transfer-Encoding: base64
1993
1994YXNkZg==
1995--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001996 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001997
1998
Ezio Melottib3aedd42010-11-20 19:04:17 +00001999
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002000# Test some badly formatted messages
R David Murrayc27e5222012-05-25 15:01:48 -04002001class TestNonConformant(TestEmailBase):
R David Murray3edd22a2011-04-18 13:59:37 -04002002
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002003 def test_parse_missing_minor_type(self):
2004 eq = self.assertEqual
2005 msg = self._msgobj('msg_14.txt')
2006 eq(msg.get_content_type(), 'text/plain')
2007 eq(msg.get_content_maintype(), 'text')
2008 eq(msg.get_content_subtype(), 'plain')
2009
R David Murray80e0aee2012-05-27 21:23:34 -04002010 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002011 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002012 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002013 msg = self._msgobj('msg_15.txt')
2014 # XXX We can probably eventually do better
2015 inner = msg.get_payload(0)
2016 unless(hasattr(inner, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04002017 self.assertEqual(len(inner.defects), 1)
2018 unless(isinstance(inner.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002019 errors.StartBoundaryNotFoundDefect))
2020
R David Murray80e0aee2012-05-27 21:23:34 -04002021 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002022 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002023 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002024 msg = self._msgobj('msg_25.txt')
2025 unless(isinstance(msg.get_payload(), str))
R David Murrayc27e5222012-05-25 15:01:48 -04002026 self.assertEqual(len(msg.defects), 2)
2027 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04002028 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04002029 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002030 errors.MultipartInvariantViolationDefect))
2031
R David Murray749073a2011-06-22 13:47:53 -04002032 multipart_msg = textwrap.dedent("""\
2033 Date: Wed, 14 Nov 2007 12:56:23 GMT
2034 From: foo@bar.invalid
2035 To: foo@bar.invalid
2036 Subject: Content-Transfer-Encoding: base64 and multipart
2037 MIME-Version: 1.0
2038 Content-Type: multipart/mixed;
2039 boundary="===============3344438784458119861=="{}
2040
2041 --===============3344438784458119861==
2042 Content-Type: text/plain
2043
2044 Test message
2045
2046 --===============3344438784458119861==
2047 Content-Type: application/octet-stream
2048 Content-Transfer-Encoding: base64
2049
2050 YWJj
2051
2052 --===============3344438784458119861==--
2053 """)
2054
R David Murray80e0aee2012-05-27 21:23:34 -04002055 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04002056 def test_multipart_invalid_cte(self):
R David Murrayc27e5222012-05-25 15:01:48 -04002057 msg = self._str_msg(
2058 self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
2059 self.assertEqual(len(msg.defects), 1)
2060 self.assertIsInstance(msg.defects[0],
R David Murray749073a2011-06-22 13:47:53 -04002061 errors.InvalidMultipartContentTransferEncodingDefect)
2062
R David Murray80e0aee2012-05-27 21:23:34 -04002063 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04002064 def test_multipart_no_cte_no_defect(self):
R David Murrayc27e5222012-05-25 15:01:48 -04002065 msg = self._str_msg(self.multipart_msg.format(''))
2066 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04002067
R David Murray80e0aee2012-05-27 21:23:34 -04002068 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04002069 def test_multipart_valid_cte_no_defect(self):
2070 for cte in ('7bit', '8bit', 'BINary'):
R David Murrayc27e5222012-05-25 15:01:48 -04002071 msg = self._str_msg(
R David Murray749073a2011-06-22 13:47:53 -04002072 self.multipart_msg.format(
R David Murrayc27e5222012-05-25 15:01:48 -04002073 "\nContent-Transfer-Encoding: {}".format(cte)))
2074 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04002075
R David Murray97f43c02012-06-24 05:03:27 -04002076 # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002077 def test_invalid_content_type(self):
2078 eq = self.assertEqual
2079 neq = self.ndiffAssertEqual
2080 msg = Message()
2081 # RFC 2045, $5.2 says invalid yields text/plain
2082 msg['Content-Type'] = 'text'
2083 eq(msg.get_content_maintype(), 'text')
2084 eq(msg.get_content_subtype(), 'plain')
2085 eq(msg.get_content_type(), 'text/plain')
2086 # Clear the old value and try something /really/ invalid
2087 del msg['content-type']
2088 msg['Content-Type'] = 'foo'
2089 eq(msg.get_content_maintype(), 'text')
2090 eq(msg.get_content_subtype(), 'plain')
2091 eq(msg.get_content_type(), 'text/plain')
2092 # Still, make sure that the message is idempotently generated
2093 s = StringIO()
2094 g = Generator(s)
2095 g.flatten(msg)
2096 neq(s.getvalue(), 'Content-Type: foo\n\n')
2097
2098 def test_no_start_boundary(self):
2099 eq = self.ndiffAssertEqual
2100 msg = self._msgobj('msg_31.txt')
2101 eq(msg.get_payload(), """\
2102--BOUNDARY
2103Content-Type: text/plain
2104
2105message 1
2106
2107--BOUNDARY
2108Content-Type: text/plain
2109
2110message 2
2111
2112--BOUNDARY--
2113""")
2114
2115 def test_no_separating_blank_line(self):
2116 eq = self.ndiffAssertEqual
2117 msg = self._msgobj('msg_35.txt')
2118 eq(msg.as_string(), """\
2119From: aperson@dom.ain
2120To: bperson@dom.ain
2121Subject: here's something interesting
2122
2123counter to RFC 2822, there's no separating newline here
2124""")
2125
R David Murray80e0aee2012-05-27 21:23:34 -04002126 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002127 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002128 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002129 msg = self._msgobj('msg_41.txt')
2130 unless(hasattr(msg, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04002131 self.assertEqual(len(msg.defects), 2)
2132 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04002133 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04002134 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002135 errors.MultipartInvariantViolationDefect))
2136
R David Murray80e0aee2012-05-27 21:23:34 -04002137 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002138 def test_missing_start_boundary(self):
2139 outer = self._msgobj('msg_42.txt')
2140 # The message structure is:
2141 #
2142 # multipart/mixed
2143 # text/plain
2144 # message/rfc822
2145 # multipart/mixed [*]
2146 #
2147 # [*] This message is missing its start boundary
2148 bad = outer.get_payload(1).get_payload(0)
R David Murrayc27e5222012-05-25 15:01:48 -04002149 self.assertEqual(len(bad.defects), 1)
2150 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002151 errors.StartBoundaryNotFoundDefect))
2152
R David Murray80e0aee2012-05-27 21:23:34 -04002153 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002154 def test_first_line_is_continuation_header(self):
2155 eq = self.assertEqual
R David Murrayadbdcdb2012-05-27 20:45:01 -04002156 m = ' Line 1\nSubject: test\n\nbody'
R David Murrayc27e5222012-05-25 15:01:48 -04002157 msg = email.message_from_string(m)
R David Murrayadbdcdb2012-05-27 20:45:01 -04002158 eq(msg.keys(), ['Subject'])
2159 eq(msg.get_payload(), 'body')
R David Murrayc27e5222012-05-25 15:01:48 -04002160 eq(len(msg.defects), 1)
R David Murrayadbdcdb2012-05-27 20:45:01 -04002161 self.assertDefectsEqual(msg.defects,
2162 [errors.FirstHeaderLineIsContinuationDefect])
R David Murrayc27e5222012-05-25 15:01:48 -04002163 eq(msg.defects[0].line, ' Line 1\n')
R David Murray3edd22a2011-04-18 13:59:37 -04002164
R David Murrayd41595b2012-05-28 20:14:10 -04002165 # test_defect_handling
R David Murrayadbdcdb2012-05-27 20:45:01 -04002166 def test_missing_header_body_separator(self):
2167 # Our heuristic if we see a line that doesn't look like a header (no
2168 # leading whitespace but no ':') is to assume that the blank line that
2169 # separates the header from the body is missing, and to stop parsing
2170 # headers and start parsing the body.
2171 msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
2172 self.assertEqual(msg.keys(), ['Subject'])
2173 self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
2174 self.assertDefectsEqual(msg.defects,
2175 [errors.MissingHeaderBodySeparatorDefect])
2176
Ezio Melottib3aedd42010-11-20 19:04:17 +00002177
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002178# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00002179class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002180 def test_rfc2047_multiline(self):
2181 eq = self.assertEqual
2182 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2183 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2184 dh = decode_header(s)
2185 eq(dh, [
R David Murray07ea53c2012-06-02 17:56:49 -04002186 (b'Re: ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002187 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
R David Murray07ea53c2012-06-02 17:56:49 -04002188 (b' baz foo bar ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002189 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2190 header = make_header(dh)
2191 eq(str(header),
2192 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00002193 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002194Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2195 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002196
R David Murray07ea53c2012-06-02 17:56:49 -04002197 def test_whitespace_keeper_unicode(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002198 eq = self.assertEqual
2199 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2200 dh = decode_header(s)
2201 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
R David Murray07ea53c2012-06-02 17:56:49 -04002202 (b' Pirard <pirard@dom.ain>', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002203 header = str(make_header(dh))
2204 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2205
R David Murray07ea53c2012-06-02 17:56:49 -04002206 def test_whitespace_keeper_unicode_2(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002207 eq = self.assertEqual
2208 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2209 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002210 eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'),
2211 (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002212 hu = str(make_header(dh))
2213 eq(hu, 'The quick brown fox jumped over the lazy dog')
2214
2215 def test_rfc2047_missing_whitespace(self):
2216 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2217 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002218 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2219 (b'rg', None), (b'\xe5', 'iso-8859-1'),
2220 (b'sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002221
2222 def test_rfc2047_with_whitespace(self):
2223 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2224 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002225 self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'),
2226 (b' rg ', None), (b'\xe5', 'iso-8859-1'),
2227 (b' sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002228
R. David Murrayc4e69cc2010-08-03 22:14:10 +00002229 def test_rfc2047_B_bad_padding(self):
2230 s = '=?iso-8859-1?B?%s?='
2231 data = [ # only test complete bytes
2232 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2233 ('dmk=', b'vi'), ('dmk', b'vi')
2234 ]
2235 for q, a in data:
2236 dh = decode_header(s % q)
2237 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002238
R. David Murray31e984c2010-10-01 15:40:20 +00002239 def test_rfc2047_Q_invalid_digits(self):
2240 # issue 10004.
2241 s = '=?iso-8659-1?Q?andr=e9=zz?='
2242 self.assertEqual(decode_header(s),
2243 [(b'andr\xe9=zz', 'iso-8659-1')])
2244
R David Murray07ea53c2012-06-02 17:56:49 -04002245 def test_rfc2047_rfc2047_1(self):
2246 # 1st testcase at end of rfc2047
2247 s = '(=?ISO-8859-1?Q?a?=)'
2248 self.assertEqual(decode_header(s),
2249 [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)])
2250
2251 def test_rfc2047_rfc2047_2(self):
2252 # 2nd testcase at end of rfc2047
2253 s = '(=?ISO-8859-1?Q?a?= b)'
2254 self.assertEqual(decode_header(s),
2255 [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)])
2256
2257 def test_rfc2047_rfc2047_3(self):
2258 # 3rd testcase at end of rfc2047
2259 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2260 self.assertEqual(decode_header(s),
2261 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2262
2263 def test_rfc2047_rfc2047_4(self):
2264 # 4th testcase at end of rfc2047
2265 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2266 self.assertEqual(decode_header(s),
2267 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2268
2269 def test_rfc2047_rfc2047_5a(self):
2270 # 5th testcase at end of rfc2047 newline is \r\n
2271 s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)'
2272 self.assertEqual(decode_header(s),
2273 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2274
2275 def test_rfc2047_rfc2047_5b(self):
2276 # 5th testcase at end of rfc2047 newline is \n
2277 s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)'
2278 self.assertEqual(decode_header(s),
2279 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2280
2281 def test_rfc2047_rfc2047_6(self):
2282 # 6th testcase at end of rfc2047
2283 s = '(=?ISO-8859-1?Q?a_b?=)'
2284 self.assertEqual(decode_header(s),
2285 [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)])
2286
2287 def test_rfc2047_rfc2047_7(self):
2288 # 7th testcase at end of rfc2047
2289 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)'
2290 self.assertEqual(decode_header(s),
2291 [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'),
2292 (b')', None)])
2293 self.assertEqual(make_header(decode_header(s)).encode(), s.lower())
2294 self.assertEqual(str(make_header(decode_header(s))), '(a b)')
2295
R David Murray82ffabd2012-06-03 12:27:07 -04002296 def test_multiline_header(self):
2297 s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>'
2298 self.assertEqual(decode_header(s),
2299 [(b'"M\xfcller T"', 'windows-1252'),
2300 (b'<T.Mueller@xxx.com>', None)])
2301 self.assertEqual(make_header(decode_header(s)).encode(),
2302 ''.join(s.splitlines()))
2303 self.assertEqual(str(make_header(decode_header(s))),
2304 '"Müller T" <T.Mueller@xxx.com>')
2305
Ezio Melottib3aedd42010-11-20 19:04:17 +00002306
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002307# Test the MIMEMessage class
2308class TestMIMEMessage(TestEmailBase):
2309 def setUp(self):
2310 with openfile('msg_11.txt') as fp:
2311 self._text = fp.read()
2312
2313 def test_type_error(self):
2314 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2315
2316 def test_valid_argument(self):
2317 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002318 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002319 subject = 'A sub-message'
2320 m = Message()
2321 m['Subject'] = subject
2322 r = MIMEMessage(m)
2323 eq(r.get_content_type(), 'message/rfc822')
2324 payload = r.get_payload()
2325 unless(isinstance(payload, list))
2326 eq(len(payload), 1)
2327 subpart = payload[0]
2328 unless(subpart is m)
2329 eq(subpart['subject'], subject)
2330
2331 def test_bad_multipart(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002332 msg1 = Message()
2333 msg1['Subject'] = 'subpart 1'
2334 msg2 = Message()
2335 msg2['Subject'] = 'subpart 2'
2336 r = MIMEMessage(msg1)
2337 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2338
2339 def test_generate(self):
2340 # First craft the message to be encapsulated
2341 m = Message()
2342 m['Subject'] = 'An enclosed message'
2343 m.set_payload('Here is the body of the message.\n')
2344 r = MIMEMessage(m)
2345 r['Subject'] = 'The enclosing message'
2346 s = StringIO()
2347 g = Generator(s)
2348 g.flatten(r)
2349 self.assertEqual(s.getvalue(), """\
2350Content-Type: message/rfc822
2351MIME-Version: 1.0
2352Subject: The enclosing message
2353
2354Subject: An enclosed message
2355
2356Here is the body of the message.
2357""")
2358
2359 def test_parse_message_rfc822(self):
2360 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002361 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002362 msg = self._msgobj('msg_11.txt')
2363 eq(msg.get_content_type(), 'message/rfc822')
2364 payload = msg.get_payload()
2365 unless(isinstance(payload, list))
2366 eq(len(payload), 1)
2367 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002368 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002369 eq(submsg['subject'], 'An enclosed message')
2370 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2371
2372 def test_dsn(self):
2373 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002374 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002375 # msg 16 is a Delivery Status Notification, see RFC 1894
2376 msg = self._msgobj('msg_16.txt')
2377 eq(msg.get_content_type(), 'multipart/report')
2378 unless(msg.is_multipart())
2379 eq(len(msg.get_payload()), 3)
2380 # Subpart 1 is a text/plain, human readable section
2381 subpart = msg.get_payload(0)
2382 eq(subpart.get_content_type(), 'text/plain')
2383 eq(subpart.get_payload(), """\
2384This report relates to a message you sent with the following header fields:
2385
2386 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2387 Date: Sun, 23 Sep 2001 20:10:55 -0700
2388 From: "Ian T. Henry" <henryi@oxy.edu>
2389 To: SoCal Raves <scr@socal-raves.org>
2390 Subject: [scr] yeah for Ians!!
2391
2392Your message cannot be delivered to the following recipients:
2393
2394 Recipient address: jangel1@cougar.noc.ucla.edu
2395 Reason: recipient reached disk quota
2396
2397""")
2398 # Subpart 2 contains the machine parsable DSN information. It
2399 # consists of two blocks of headers, represented by two nested Message
2400 # objects.
2401 subpart = msg.get_payload(1)
2402 eq(subpart.get_content_type(), 'message/delivery-status')
2403 eq(len(subpart.get_payload()), 2)
2404 # message/delivery-status should treat each block as a bunch of
2405 # headers, i.e. a bunch of Message objects.
2406 dsn1 = subpart.get_payload(0)
2407 unless(isinstance(dsn1, Message))
2408 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2409 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2410 # Try a missing one <wink>
2411 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2412 dsn2 = subpart.get_payload(1)
2413 unless(isinstance(dsn2, Message))
2414 eq(dsn2['action'], 'failed')
2415 eq(dsn2.get_params(header='original-recipient'),
2416 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2417 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2418 # Subpart 3 is the original message
2419 subpart = msg.get_payload(2)
2420 eq(subpart.get_content_type(), 'message/rfc822')
2421 payload = subpart.get_payload()
2422 unless(isinstance(payload, list))
2423 eq(len(payload), 1)
2424 subsubpart = payload[0]
2425 unless(isinstance(subsubpart, Message))
2426 eq(subsubpart.get_content_type(), 'text/plain')
2427 eq(subsubpart['message-id'],
2428 '<002001c144a6$8752e060$56104586@oxy.edu>')
2429
2430 def test_epilogue(self):
2431 eq = self.ndiffAssertEqual
2432 with openfile('msg_21.txt') as fp:
2433 text = fp.read()
2434 msg = Message()
2435 msg['From'] = 'aperson@dom.ain'
2436 msg['To'] = 'bperson@dom.ain'
2437 msg['Subject'] = 'Test'
2438 msg.preamble = 'MIME message'
2439 msg.epilogue = 'End of MIME message\n'
2440 msg1 = MIMEText('One')
2441 msg2 = MIMEText('Two')
2442 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2443 msg.attach(msg1)
2444 msg.attach(msg2)
2445 sfp = StringIO()
2446 g = Generator(sfp)
2447 g.flatten(msg)
2448 eq(sfp.getvalue(), text)
2449
2450 def test_no_nl_preamble(self):
2451 eq = self.ndiffAssertEqual
2452 msg = Message()
2453 msg['From'] = 'aperson@dom.ain'
2454 msg['To'] = 'bperson@dom.ain'
2455 msg['Subject'] = 'Test'
2456 msg.preamble = 'MIME message'
2457 msg.epilogue = ''
2458 msg1 = MIMEText('One')
2459 msg2 = MIMEText('Two')
2460 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2461 msg.attach(msg1)
2462 msg.attach(msg2)
2463 eq(msg.as_string(), """\
2464From: aperson@dom.ain
2465To: bperson@dom.ain
2466Subject: Test
2467Content-Type: multipart/mixed; boundary="BOUNDARY"
2468
2469MIME message
2470--BOUNDARY
2471Content-Type: text/plain; charset="us-ascii"
2472MIME-Version: 1.0
2473Content-Transfer-Encoding: 7bit
2474
2475One
2476--BOUNDARY
2477Content-Type: text/plain; charset="us-ascii"
2478MIME-Version: 1.0
2479Content-Transfer-Encoding: 7bit
2480
2481Two
2482--BOUNDARY--
2483""")
2484
2485 def test_default_type(self):
2486 eq = self.assertEqual
2487 with openfile('msg_30.txt') as fp:
2488 msg = email.message_from_file(fp)
2489 container1 = msg.get_payload(0)
2490 eq(container1.get_default_type(), 'message/rfc822')
2491 eq(container1.get_content_type(), 'message/rfc822')
2492 container2 = msg.get_payload(1)
2493 eq(container2.get_default_type(), 'message/rfc822')
2494 eq(container2.get_content_type(), 'message/rfc822')
2495 container1a = container1.get_payload(0)
2496 eq(container1a.get_default_type(), 'text/plain')
2497 eq(container1a.get_content_type(), 'text/plain')
2498 container2a = container2.get_payload(0)
2499 eq(container2a.get_default_type(), 'text/plain')
2500 eq(container2a.get_content_type(), 'text/plain')
2501
2502 def test_default_type_with_explicit_container_type(self):
2503 eq = self.assertEqual
2504 with openfile('msg_28.txt') as fp:
2505 msg = email.message_from_file(fp)
2506 container1 = msg.get_payload(0)
2507 eq(container1.get_default_type(), 'message/rfc822')
2508 eq(container1.get_content_type(), 'message/rfc822')
2509 container2 = msg.get_payload(1)
2510 eq(container2.get_default_type(), 'message/rfc822')
2511 eq(container2.get_content_type(), 'message/rfc822')
2512 container1a = container1.get_payload(0)
2513 eq(container1a.get_default_type(), 'text/plain')
2514 eq(container1a.get_content_type(), 'text/plain')
2515 container2a = container2.get_payload(0)
2516 eq(container2a.get_default_type(), 'text/plain')
2517 eq(container2a.get_content_type(), 'text/plain')
2518
2519 def test_default_type_non_parsed(self):
2520 eq = self.assertEqual
2521 neq = self.ndiffAssertEqual
2522 # Set up container
2523 container = MIMEMultipart('digest', 'BOUNDARY')
2524 container.epilogue = ''
2525 # Set up subparts
2526 subpart1a = MIMEText('message 1\n')
2527 subpart2a = MIMEText('message 2\n')
2528 subpart1 = MIMEMessage(subpart1a)
2529 subpart2 = MIMEMessage(subpart2a)
2530 container.attach(subpart1)
2531 container.attach(subpart2)
2532 eq(subpart1.get_content_type(), 'message/rfc822')
2533 eq(subpart1.get_default_type(), 'message/rfc822')
2534 eq(subpart2.get_content_type(), 'message/rfc822')
2535 eq(subpart2.get_default_type(), 'message/rfc822')
2536 neq(container.as_string(0), '''\
2537Content-Type: multipart/digest; boundary="BOUNDARY"
2538MIME-Version: 1.0
2539
2540--BOUNDARY
2541Content-Type: message/rfc822
2542MIME-Version: 1.0
2543
2544Content-Type: text/plain; charset="us-ascii"
2545MIME-Version: 1.0
2546Content-Transfer-Encoding: 7bit
2547
2548message 1
2549
2550--BOUNDARY
2551Content-Type: message/rfc822
2552MIME-Version: 1.0
2553
2554Content-Type: text/plain; charset="us-ascii"
2555MIME-Version: 1.0
2556Content-Transfer-Encoding: 7bit
2557
2558message 2
2559
2560--BOUNDARY--
2561''')
2562 del subpart1['content-type']
2563 del subpart1['mime-version']
2564 del subpart2['content-type']
2565 del subpart2['mime-version']
2566 eq(subpart1.get_content_type(), 'message/rfc822')
2567 eq(subpart1.get_default_type(), 'message/rfc822')
2568 eq(subpart2.get_content_type(), 'message/rfc822')
2569 eq(subpart2.get_default_type(), 'message/rfc822')
2570 neq(container.as_string(0), '''\
2571Content-Type: multipart/digest; boundary="BOUNDARY"
2572MIME-Version: 1.0
2573
2574--BOUNDARY
2575
2576Content-Type: text/plain; charset="us-ascii"
2577MIME-Version: 1.0
2578Content-Transfer-Encoding: 7bit
2579
2580message 1
2581
2582--BOUNDARY
2583
2584Content-Type: text/plain; charset="us-ascii"
2585MIME-Version: 1.0
2586Content-Transfer-Encoding: 7bit
2587
2588message 2
2589
2590--BOUNDARY--
2591''')
2592
2593 def test_mime_attachments_in_constructor(self):
2594 eq = self.assertEqual
2595 text1 = MIMEText('')
2596 text2 = MIMEText('')
2597 msg = MIMEMultipart(_subparts=(text1, text2))
2598 eq(len(msg.get_payload()), 2)
2599 eq(msg.get_payload(0), text1)
2600 eq(msg.get_payload(1), text2)
2601
Christian Heimes587c2bf2008-01-19 16:21:02 +00002602 def test_default_multipart_constructor(self):
2603 msg = MIMEMultipart()
2604 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002605
Ezio Melottib3aedd42010-11-20 19:04:17 +00002606
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002607# A general test of parser->model->generator idempotency. IOW, read a message
2608# in, parse it into a message object tree, then without touching the tree,
2609# regenerate the plain text. The original text and the transformed text
2610# should be identical. Note: that we ignore the Unix-From since that may
2611# contain a changed date.
2612class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002613
2614 linesep = '\n'
2615
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002616 def _msgobj(self, filename):
2617 with openfile(filename) as fp:
2618 data = fp.read()
2619 msg = email.message_from_string(data)
2620 return msg, data
2621
R. David Murray719a4492010-11-21 16:53:48 +00002622 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002623 eq = self.ndiffAssertEqual
2624 s = StringIO()
2625 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002626 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002627 eq(text, s.getvalue())
2628
2629 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002630 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002631 msg, text = self._msgobj('msg_01.txt')
2632 eq(msg.get_content_type(), 'text/plain')
2633 eq(msg.get_content_maintype(), 'text')
2634 eq(msg.get_content_subtype(), 'plain')
2635 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2636 eq(msg.get_param('charset'), 'us-ascii')
2637 eq(msg.preamble, None)
2638 eq(msg.epilogue, None)
2639 self._idempotent(msg, text)
2640
2641 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002642 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002643 msg, text = self._msgobj('msg_03.txt')
2644 eq(msg.get_content_type(), 'text/plain')
2645 eq(msg.get_params(), None)
2646 eq(msg.get_param('charset'), None)
2647 self._idempotent(msg, text)
2648
2649 def test_simple_multipart(self):
2650 msg, text = self._msgobj('msg_04.txt')
2651 self._idempotent(msg, text)
2652
2653 def test_MIME_digest(self):
2654 msg, text = self._msgobj('msg_02.txt')
2655 self._idempotent(msg, text)
2656
2657 def test_long_header(self):
2658 msg, text = self._msgobj('msg_27.txt')
2659 self._idempotent(msg, text)
2660
2661 def test_MIME_digest_with_part_headers(self):
2662 msg, text = self._msgobj('msg_28.txt')
2663 self._idempotent(msg, text)
2664
2665 def test_mixed_with_image(self):
2666 msg, text = self._msgobj('msg_06.txt')
2667 self._idempotent(msg, text)
2668
2669 def test_multipart_report(self):
2670 msg, text = self._msgobj('msg_05.txt')
2671 self._idempotent(msg, text)
2672
2673 def test_dsn(self):
2674 msg, text = self._msgobj('msg_16.txt')
2675 self._idempotent(msg, text)
2676
2677 def test_preamble_epilogue(self):
2678 msg, text = self._msgobj('msg_21.txt')
2679 self._idempotent(msg, text)
2680
2681 def test_multipart_one_part(self):
2682 msg, text = self._msgobj('msg_23.txt')
2683 self._idempotent(msg, text)
2684
2685 def test_multipart_no_parts(self):
2686 msg, text = self._msgobj('msg_24.txt')
2687 self._idempotent(msg, text)
2688
2689 def test_no_start_boundary(self):
2690 msg, text = self._msgobj('msg_31.txt')
2691 self._idempotent(msg, text)
2692
2693 def test_rfc2231_charset(self):
2694 msg, text = self._msgobj('msg_32.txt')
2695 self._idempotent(msg, text)
2696
2697 def test_more_rfc2231_parameters(self):
2698 msg, text = self._msgobj('msg_33.txt')
2699 self._idempotent(msg, text)
2700
2701 def test_text_plain_in_a_multipart_digest(self):
2702 msg, text = self._msgobj('msg_34.txt')
2703 self._idempotent(msg, text)
2704
2705 def test_nested_multipart_mixeds(self):
2706 msg, text = self._msgobj('msg_12a.txt')
2707 self._idempotent(msg, text)
2708
2709 def test_message_external_body_idempotent(self):
2710 msg, text = self._msgobj('msg_36.txt')
2711 self._idempotent(msg, text)
2712
R. David Murray719a4492010-11-21 16:53:48 +00002713 def test_message_delivery_status(self):
2714 msg, text = self._msgobj('msg_43.txt')
2715 self._idempotent(msg, text, unixfrom=True)
2716
R. David Murray96fd54e2010-10-08 15:55:28 +00002717 def test_message_signed_idempotent(self):
2718 msg, text = self._msgobj('msg_45.txt')
2719 self._idempotent(msg, text)
2720
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002721 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002722 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002723 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002724 # Get a message object and reset the seek pointer for other tests
2725 msg, text = self._msgobj('msg_05.txt')
2726 eq(msg.get_content_type(), 'multipart/report')
2727 # Test the Content-Type: parameters
2728 params = {}
2729 for pk, pv in msg.get_params():
2730 params[pk] = pv
2731 eq(params['report-type'], 'delivery-status')
2732 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002733 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2734 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002735 eq(len(msg.get_payload()), 3)
2736 # Make sure the subparts are what we expect
2737 msg1 = msg.get_payload(0)
2738 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002739 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002740 msg2 = msg.get_payload(1)
2741 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002742 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002743 msg3 = msg.get_payload(2)
2744 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002745 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002746 payload = msg3.get_payload()
2747 unless(isinstance(payload, list))
2748 eq(len(payload), 1)
2749 msg4 = payload[0]
2750 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002751 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002752
2753 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002754 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002755 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002756 msg, text = self._msgobj('msg_06.txt')
2757 # Check some of the outer headers
2758 eq(msg.get_content_type(), 'message/rfc822')
2759 # Make sure the payload is a list of exactly one sub-Message, and that
2760 # that submessage has a type of text/plain
2761 payload = msg.get_payload()
2762 unless(isinstance(payload, list))
2763 eq(len(payload), 1)
2764 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002765 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002766 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002767 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002768 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002769
2770
Ezio Melottib3aedd42010-11-20 19:04:17 +00002771
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002772# Test various other bits of the package's functionality
2773class TestMiscellaneous(TestEmailBase):
2774 def test_message_from_string(self):
2775 with openfile('msg_01.txt') as fp:
2776 text = fp.read()
2777 msg = email.message_from_string(text)
2778 s = StringIO()
2779 # Don't wrap/continue long headers since we're trying to test
2780 # idempotency.
2781 g = Generator(s, maxheaderlen=0)
2782 g.flatten(msg)
2783 self.assertEqual(text, s.getvalue())
2784
2785 def test_message_from_file(self):
2786 with openfile('msg_01.txt') as fp:
2787 text = fp.read()
2788 fp.seek(0)
2789 msg = email.message_from_file(fp)
2790 s = StringIO()
2791 # Don't wrap/continue long headers since we're trying to test
2792 # idempotency.
2793 g = Generator(s, maxheaderlen=0)
2794 g.flatten(msg)
2795 self.assertEqual(text, s.getvalue())
2796
2797 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002798 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002799 with openfile('msg_01.txt') as fp:
2800 text = fp.read()
2801
2802 # Create a subclass
2803 class MyMessage(Message):
2804 pass
2805
2806 msg = email.message_from_string(text, MyMessage)
2807 unless(isinstance(msg, MyMessage))
2808 # Try something more complicated
2809 with openfile('msg_02.txt') as fp:
2810 text = fp.read()
2811 msg = email.message_from_string(text, MyMessage)
2812 for subpart in msg.walk():
2813 unless(isinstance(subpart, MyMessage))
2814
2815 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002816 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002817 # Create a subclass
2818 class MyMessage(Message):
2819 pass
2820
2821 with openfile('msg_01.txt') as fp:
2822 msg = email.message_from_file(fp, MyMessage)
2823 unless(isinstance(msg, MyMessage))
2824 # Try something more complicated
2825 with openfile('msg_02.txt') as fp:
2826 msg = email.message_from_file(fp, MyMessage)
2827 for subpart in msg.walk():
2828 unless(isinstance(subpart, MyMessage))
2829
R David Murrayc27e5222012-05-25 15:01:48 -04002830 def test_custom_message_does_not_require_arguments(self):
2831 class MyMessage(Message):
2832 def __init__(self):
2833 super().__init__()
2834 msg = self._str_msg("Subject: test\n\ntest", MyMessage)
2835 self.assertTrue(isinstance(msg, MyMessage))
2836
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002837 def test__all__(self):
2838 module = __import__('email')
R David Murray1b6c7242012-03-16 22:43:05 -04002839 self.assertEqual(sorted(module.__all__), [
2840 'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2841 'generator', 'header', 'iterators', 'message',
2842 'message_from_binary_file', 'message_from_bytes',
2843 'message_from_file', 'message_from_string', 'mime', 'parser',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002844 'quoprimime', 'utils',
2845 ])
2846
2847 def test_formatdate(self):
2848 now = time.time()
2849 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2850 time.gmtime(now)[:6])
2851
2852 def test_formatdate_localtime(self):
2853 now = time.time()
2854 self.assertEqual(
2855 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2856 time.localtime(now)[:6])
2857
2858 def test_formatdate_usegmt(self):
2859 now = time.time()
2860 self.assertEqual(
2861 utils.formatdate(now, localtime=False),
2862 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2863 self.assertEqual(
2864 utils.formatdate(now, localtime=False, usegmt=True),
2865 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2866
Georg Brandl1aca31e2012-09-22 09:03:56 +02002867 # parsedate and parsedate_tz will become deprecated interfaces someday
2868 def test_parsedate_returns_None_for_invalid_strings(self):
2869 self.assertIsNone(utils.parsedate(''))
2870 self.assertIsNone(utils.parsedate_tz(''))
2871 self.assertIsNone(utils.parsedate('0'))
2872 self.assertIsNone(utils.parsedate_tz('0'))
2873 self.assertIsNone(utils.parsedate('A Complete Waste of Time'))
2874 self.assertIsNone(utils.parsedate_tz('A Complete Waste of Time'))
2875 # Not a part of the spec but, but this has historically worked:
2876 self.assertIsNone(utils.parsedate(None))
2877 self.assertIsNone(utils.parsedate_tz(None))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002878
2879 def test_parsedate_compact(self):
2880 # The FWS after the comma is optional
2881 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2882 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2883
2884 def test_parsedate_no_dayofweek(self):
2885 eq = self.assertEqual
2886 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2887 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2888
2889 def test_parsedate_compact_no_dayofweek(self):
2890 eq = self.assertEqual
2891 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2892 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2893
R. David Murray4a62e892010-12-23 20:35:46 +00002894 def test_parsedate_no_space_before_positive_offset(self):
2895 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2896 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2897
2898 def test_parsedate_no_space_before_negative_offset(self):
2899 # Issue 1155362: we already handled '+' for this case.
2900 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2901 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2902
2903
R David Murrayaccd1c02011-03-13 20:06:23 -04002904 def test_parsedate_accepts_time_with_dots(self):
2905 eq = self.assertEqual
2906 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2907 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2908 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2909 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2910
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002911 def test_parsedate_acceptable_to_time_functions(self):
2912 eq = self.assertEqual
2913 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2914 t = int(time.mktime(timetup))
2915 eq(time.localtime(t)[:6], timetup[:6])
2916 eq(int(time.strftime('%Y', timetup)), 2003)
2917 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2918 t = int(time.mktime(timetup[:9]))
2919 eq(time.localtime(t)[:6], timetup[:6])
2920 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2921
Alexander Belopolskya07548e2012-06-21 20:34:09 -04002922 def test_mktime_tz(self):
2923 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2924 -1, -1, -1, 0)), 0)
2925 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2926 -1, -1, -1, 1234)), -1234)
2927
R. David Murray219d1c82010-08-25 00:45:55 +00002928 def test_parsedate_y2k(self):
2929 """Test for parsing a date with a two-digit year.
2930
2931 Parsing a date with a two-digit year should return the correct
2932 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2933 obsoletes RFC822) requires four-digit years.
2934
2935 """
2936 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2937 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2938 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2939 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2940
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002941 def test_parseaddr_empty(self):
2942 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2943 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2944
2945 def test_noquote_dump(self):
2946 self.assertEqual(
2947 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2948 'A Silly Person <person@dom.ain>')
2949
2950 def test_escape_dump(self):
2951 self.assertEqual(
2952 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
R David Murrayb53319f2012-03-14 15:31:47 -04002953 r'"A (Very) Silly Person" <person@dom.ain>')
2954 self.assertEqual(
2955 utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
2956 ('A (Very) Silly Person', 'person@dom.ain'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002957 a = r'A \(Special\) Person'
2958 b = 'person@dom.ain'
2959 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2960
2961 def test_escape_backslashes(self):
2962 self.assertEqual(
2963 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2964 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2965 a = r'Arthur \Backslash\ Foobar'
2966 b = 'person@dom.ain'
2967 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2968
R David Murray8debacb2011-04-06 09:35:57 -04002969 def test_quotes_unicode_names(self):
2970 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2971 name = "H\u00e4ns W\u00fcrst"
2972 addr = 'person@dom.ain'
2973 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2974 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
2975 self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
2976 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
2977 latin1_quopri)
2978
2979 def test_accepts_any_charset_like_object(self):
2980 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2981 name = "H\u00e4ns W\u00fcrst"
2982 addr = 'person@dom.ain'
2983 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2984 foobar = "FOOBAR"
2985 class CharsetMock:
2986 def header_encode(self, string):
2987 return foobar
2988 mock = CharsetMock()
2989 mock_expected = "%s <%s>" % (foobar, addr)
2990 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
2991 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
2992 utf8_base64)
2993
2994 def test_invalid_charset_like_object_raises_error(self):
2995 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2996 name = "H\u00e4ns W\u00fcrst"
2997 addr = 'person@dom.ain'
2998 # A object without a header_encode method:
2999 bad_charset = object()
3000 self.assertRaises(AttributeError, utils.formataddr, (name, addr),
3001 bad_charset)
3002
3003 def test_unicode_address_raises_error(self):
3004 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
3005 addr = 'pers\u00f6n@dom.in'
3006 self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
3007 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
3008
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003009 def test_name_with_dot(self):
3010 x = 'John X. Doe <jxd@example.com>'
3011 y = '"John X. Doe" <jxd@example.com>'
3012 a, b = ('John X. Doe', 'jxd@example.com')
3013 self.assertEqual(utils.parseaddr(x), (a, b))
3014 self.assertEqual(utils.parseaddr(y), (a, b))
3015 # formataddr() quotes the name if there's a dot in it
3016 self.assertEqual(utils.formataddr((a, b)), y)
3017
R. David Murray5397e862010-10-02 15:58:26 +00003018 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
3019 # issue 10005. Note that in the third test the second pair of
3020 # backslashes is not actually a quoted pair because it is not inside a
3021 # comment or quoted string: the address being parsed has a quoted
3022 # string containing a quoted backslash, followed by 'example' and two
3023 # backslashes, followed by another quoted string containing a space and
3024 # the word 'example'. parseaddr copies those two backslashes
3025 # literally. Per rfc5322 this is not technically correct since a \ may
3026 # not appear in an address outside of a quoted string. It is probably
3027 # a sensible Postel interpretation, though.
3028 eq = self.assertEqual
3029 eq(utils.parseaddr('""example" example"@example.com'),
3030 ('', '""example" example"@example.com'))
3031 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
3032 ('', '"\\"example\\" example"@example.com'))
3033 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
3034 ('', '"\\\\"example\\\\" example"@example.com'))
3035
R. David Murray63563cd2010-12-18 18:25:38 +00003036 def test_parseaddr_preserves_spaces_in_local_part(self):
3037 # issue 9286. A normal RFC5322 local part should not contain any
3038 # folding white space, but legacy local parts can (they are a sequence
3039 # of atoms, not dotatoms). On the other hand we strip whitespace from
3040 # before the @ and around dots, on the assumption that the whitespace
3041 # around the punctuation is a mistake in what would otherwise be
3042 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
3043 self.assertEqual(('', "merwok wok@xample.com"),
3044 utils.parseaddr("merwok wok@xample.com"))
3045 self.assertEqual(('', "merwok wok@xample.com"),
3046 utils.parseaddr("merwok wok@xample.com"))
3047 self.assertEqual(('', "merwok wok@xample.com"),
3048 utils.parseaddr(" merwok wok @xample.com"))
3049 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
3050 utils.parseaddr('merwok"wok" wok@xample.com'))
3051 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
3052 utils.parseaddr('merwok. wok . wok@xample.com'))
3053
R David Murrayb53319f2012-03-14 15:31:47 -04003054 def test_formataddr_does_not_quote_parens_in_quoted_string(self):
3055 addr = ("'foo@example.com' (foo@example.com)",
3056 'foo@example.com')
3057 addrstr = ('"\'foo@example.com\' '
3058 '(foo@example.com)" <foo@example.com>')
3059 self.assertEqual(utils.parseaddr(addrstr), addr)
3060 self.assertEqual(utils.formataddr(addr), addrstr)
3061
3062
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003063 def test_multiline_from_comment(self):
3064 x = """\
3065Foo
3066\tBar <foo@example.com>"""
3067 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
3068
3069 def test_quote_dump(self):
3070 self.assertEqual(
3071 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
3072 r'"A Silly; Person" <person@dom.ain>')
3073
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003074 def test_charset_richcomparisons(self):
3075 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003076 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003077 cset1 = Charset()
3078 cset2 = Charset()
3079 eq(cset1, 'us-ascii')
3080 eq(cset1, 'US-ASCII')
3081 eq(cset1, 'Us-AsCiI')
3082 eq('us-ascii', cset1)
3083 eq('US-ASCII', cset1)
3084 eq('Us-AsCiI', cset1)
3085 ne(cset1, 'usascii')
3086 ne(cset1, 'USASCII')
3087 ne(cset1, 'UsAsCiI')
3088 ne('usascii', cset1)
3089 ne('USASCII', cset1)
3090 ne('UsAsCiI', cset1)
3091 eq(cset1, cset2)
3092 eq(cset2, cset1)
3093
3094 def test_getaddresses(self):
3095 eq = self.assertEqual
3096 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
3097 'Bud Person <bperson@dom.ain>']),
3098 [('Al Person', 'aperson@dom.ain'),
3099 ('Bud Person', 'bperson@dom.ain')])
3100
3101 def test_getaddresses_nasty(self):
3102 eq = self.assertEqual
3103 eq(utils.getaddresses(['foo: ;']), [('', '')])
3104 eq(utils.getaddresses(
3105 ['[]*-- =~$']),
3106 [('', ''), ('', ''), ('', '*--')])
3107 eq(utils.getaddresses(
3108 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
3109 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
3110
3111 def test_getaddresses_embedded_comment(self):
3112 """Test proper handling of a nested comment"""
3113 eq = self.assertEqual
3114 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
3115 eq(addrs[0][1], 'foo@bar.com')
3116
3117 def test_utils_quote_unquote(self):
3118 eq = self.assertEqual
3119 msg = Message()
3120 msg.add_header('content-disposition', 'attachment',
3121 filename='foo\\wacky"name')
3122 eq(msg.get_filename(), 'foo\\wacky"name')
3123
3124 def test_get_body_encoding_with_bogus_charset(self):
3125 charset = Charset('not a charset')
3126 self.assertEqual(charset.get_body_encoding(), 'base64')
3127
3128 def test_get_body_encoding_with_uppercase_charset(self):
3129 eq = self.assertEqual
3130 msg = Message()
3131 msg['Content-Type'] = 'text/plain; charset=UTF-8'
3132 eq(msg['content-type'], 'text/plain; charset=UTF-8')
3133 charsets = msg.get_charsets()
3134 eq(len(charsets), 1)
3135 eq(charsets[0], 'utf-8')
3136 charset = Charset(charsets[0])
3137 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003138 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003139 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
3140 eq(msg.get_payload(decode=True), b'hello world')
3141 eq(msg['content-transfer-encoding'], 'base64')
3142 # Try another one
3143 msg = Message()
3144 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
3145 charsets = msg.get_charsets()
3146 eq(len(charsets), 1)
3147 eq(charsets[0], 'us-ascii')
3148 charset = Charset(charsets[0])
3149 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
3150 msg.set_payload('hello world', charset=charset)
3151 eq(msg.get_payload(), 'hello world')
3152 eq(msg['content-transfer-encoding'], '7bit')
3153
3154 def test_charsets_case_insensitive(self):
3155 lc = Charset('us-ascii')
3156 uc = Charset('US-ASCII')
3157 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
3158
3159 def test_partial_falls_inside_message_delivery_status(self):
3160 eq = self.ndiffAssertEqual
3161 # The Parser interface provides chunks of data to FeedParser in 8192
3162 # byte gulps. SF bug #1076485 found one of those chunks inside
3163 # message/delivery-status header block, which triggered an
3164 # unreadline() of NeedMoreData.
3165 msg = self._msgobj('msg_43.txt')
3166 sfp = StringIO()
3167 iterators._structure(msg, sfp)
3168 eq(sfp.getvalue(), """\
3169multipart/report
3170 text/plain
3171 message/delivery-status
3172 text/plain
3173 text/plain
3174 text/plain
3175 text/plain
3176 text/plain
3177 text/plain
3178 text/plain
3179 text/plain
3180 text/plain
3181 text/plain
3182 text/plain
3183 text/plain
3184 text/plain
3185 text/plain
3186 text/plain
3187 text/plain
3188 text/plain
3189 text/plain
3190 text/plain
3191 text/plain
3192 text/plain
3193 text/plain
3194 text/plain
3195 text/plain
3196 text/plain
3197 text/plain
3198 text/rfc822-headers
3199""")
3200
R. David Murraya0b44b52010-12-02 21:47:19 +00003201 def test_make_msgid_domain(self):
3202 self.assertEqual(
3203 email.utils.make_msgid(domain='testdomain-string')[-19:],
3204 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003205
R David Murraye67c6c52013-03-07 16:38:03 -05003206 def test_Generator_linend(self):
3207 # Issue 14645.
3208 with openfile('msg_26.txt', newline='\n') as f:
3209 msgtxt = f.read()
3210 msgtxt_nl = msgtxt.replace('\r\n', '\n')
3211 msg = email.message_from_string(msgtxt)
3212 s = StringIO()
3213 g = email.generator.Generator(s)
3214 g.flatten(msg)
3215 self.assertEqual(s.getvalue(), msgtxt_nl)
3216
3217 def test_BytesGenerator_linend(self):
3218 # Issue 14645.
3219 with openfile('msg_26.txt', newline='\n') as f:
3220 msgtxt = f.read()
3221 msgtxt_nl = msgtxt.replace('\r\n', '\n')
3222 msg = email.message_from_string(msgtxt_nl)
3223 s = BytesIO()
3224 g = email.generator.BytesGenerator(s)
3225 g.flatten(msg, linesep='\r\n')
3226 self.assertEqual(s.getvalue().decode('ascii'), msgtxt)
3227
3228 def test_BytesGenerator_linend_with_non_ascii(self):
3229 # Issue 14645.
3230 with openfile('msg_26.txt', 'rb') as f:
3231 msgtxt = f.read()
3232 msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6')
3233 msgtxt_nl = msgtxt.replace(b'\r\n', b'\n')
3234 msg = email.message_from_bytes(msgtxt_nl)
3235 s = BytesIO()
3236 g = email.generator.BytesGenerator(s)
3237 g.flatten(msg, linesep='\r\n')
3238 self.assertEqual(s.getvalue(), msgtxt)
3239
Ezio Melottib3aedd42010-11-20 19:04:17 +00003240
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003241# Test the iterator/generators
3242class TestIterators(TestEmailBase):
3243 def test_body_line_iterator(self):
3244 eq = self.assertEqual
3245 neq = self.ndiffAssertEqual
3246 # First a simple non-multipart message
3247 msg = self._msgobj('msg_01.txt')
3248 it = iterators.body_line_iterator(msg)
3249 lines = list(it)
3250 eq(len(lines), 6)
3251 neq(EMPTYSTRING.join(lines), msg.get_payload())
3252 # Now a more complicated multipart
3253 msg = self._msgobj('msg_02.txt')
3254 it = iterators.body_line_iterator(msg)
3255 lines = list(it)
3256 eq(len(lines), 43)
3257 with openfile('msg_19.txt') as fp:
3258 neq(EMPTYSTRING.join(lines), fp.read())
3259
3260 def test_typed_subpart_iterator(self):
3261 eq = self.assertEqual
3262 msg = self._msgobj('msg_04.txt')
3263 it = iterators.typed_subpart_iterator(msg, 'text')
3264 lines = []
3265 subparts = 0
3266 for subpart in it:
3267 subparts += 1
3268 lines.append(subpart.get_payload())
3269 eq(subparts, 2)
3270 eq(EMPTYSTRING.join(lines), """\
3271a simple kind of mirror
3272to reflect upon our own
3273a simple kind of mirror
3274to reflect upon our own
3275""")
3276
3277 def test_typed_subpart_iterator_default_type(self):
3278 eq = self.assertEqual
3279 msg = self._msgobj('msg_03.txt')
3280 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
3281 lines = []
3282 subparts = 0
3283 for subpart in it:
3284 subparts += 1
3285 lines.append(subpart.get_payload())
3286 eq(subparts, 1)
3287 eq(EMPTYSTRING.join(lines), """\
3288
3289Hi,
3290
3291Do you like this message?
3292
3293-Me
3294""")
3295
R. David Murray45bf773f2010-07-17 01:19:57 +00003296 def test_pushCR_LF(self):
3297 '''FeedParser BufferedSubFile.push() assumed it received complete
3298 line endings. A CR ending one push() followed by a LF starting
3299 the next push() added an empty line.
3300 '''
3301 imt = [
3302 ("a\r \n", 2),
3303 ("b", 0),
3304 ("c\n", 1),
3305 ("", 0),
3306 ("d\r\n", 1),
3307 ("e\r", 0),
3308 ("\nf", 1),
3309 ("\r\n", 1),
3310 ]
3311 from email.feedparser import BufferedSubFile, NeedMoreData
3312 bsf = BufferedSubFile()
3313 om = []
3314 nt = 0
3315 for il, n in imt:
3316 bsf.push(il)
3317 nt += n
3318 n1 = 0
3319 while True:
3320 ol = bsf.readline()
3321 if ol == NeedMoreData:
3322 break
3323 om.append(ol)
3324 n1 += 1
3325 self.assertTrue(n == n1)
3326 self.assertTrue(len(om) == nt)
3327 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
3328
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003329
Ezio Melottib3aedd42010-11-20 19:04:17 +00003330
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003331class TestParsers(TestEmailBase):
R David Murrayb35c8502011-04-13 16:46:05 -04003332
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003333 def test_header_parser(self):
3334 eq = self.assertEqual
3335 # Parse only the headers of a complex multipart MIME document
3336 with openfile('msg_02.txt') as fp:
3337 msg = HeaderParser().parse(fp)
3338 eq(msg['from'], 'ppp-request@zzz.org')
3339 eq(msg['to'], 'ppp@zzz.org')
3340 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003341 self.assertFalse(msg.is_multipart())
3342 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003343
R David Murrayb35c8502011-04-13 16:46:05 -04003344 def test_bytes_header_parser(self):
3345 eq = self.assertEqual
3346 # Parse only the headers of a complex multipart MIME document
3347 with openfile('msg_02.txt', 'rb') as fp:
3348 msg = email.parser.BytesHeaderParser().parse(fp)
3349 eq(msg['from'], 'ppp-request@zzz.org')
3350 eq(msg['to'], 'ppp@zzz.org')
3351 eq(msg.get_content_type(), 'multipart/mixed')
3352 self.assertFalse(msg.is_multipart())
3353 self.assertTrue(isinstance(msg.get_payload(), str))
3354 self.assertTrue(isinstance(msg.get_payload(decode=True), bytes))
3355
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003356 def test_whitespace_continuation(self):
3357 eq = self.assertEqual
3358 # This message contains a line after the Subject: header that has only
3359 # whitespace, but it is not empty!
3360 msg = email.message_from_string("""\
3361From: aperson@dom.ain
3362To: bperson@dom.ain
3363Subject: the next line has a space on it
3364\x20
3365Date: Mon, 8 Apr 2002 15:09:19 -0400
3366Message-ID: spam
3367
3368Here's the message body
3369""")
3370 eq(msg['subject'], 'the next line has a space on it\n ')
3371 eq(msg['message-id'], 'spam')
3372 eq(msg.get_payload(), "Here's the message body\n")
3373
3374 def test_whitespace_continuation_last_header(self):
3375 eq = self.assertEqual
3376 # Like the previous test, but the subject line is the last
3377 # header.
3378 msg = email.message_from_string("""\
3379From: aperson@dom.ain
3380To: bperson@dom.ain
3381Date: Mon, 8 Apr 2002 15:09:19 -0400
3382Message-ID: spam
3383Subject: the next line has a space on it
3384\x20
3385
3386Here's the message body
3387""")
3388 eq(msg['subject'], 'the next line has a space on it\n ')
3389 eq(msg['message-id'], 'spam')
3390 eq(msg.get_payload(), "Here's the message body\n")
3391
3392 def test_crlf_separation(self):
3393 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003394 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003395 msg = Parser().parse(fp)
3396 eq(len(msg.get_payload()), 2)
3397 part1 = msg.get_payload(0)
3398 eq(part1.get_content_type(), 'text/plain')
3399 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3400 part2 = msg.get_payload(1)
3401 eq(part2.get_content_type(), 'application/riscos')
3402
R. David Murray8451c4b2010-10-23 22:19:56 +00003403 def test_crlf_flatten(self):
3404 # Using newline='\n' preserves the crlfs in this input file.
3405 with openfile('msg_26.txt', newline='\n') as fp:
3406 text = fp.read()
3407 msg = email.message_from_string(text)
3408 s = StringIO()
3409 g = Generator(s)
3410 g.flatten(msg, linesep='\r\n')
3411 self.assertEqual(s.getvalue(), text)
3412
3413 maxDiff = None
3414
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003415 def test_multipart_digest_with_extra_mime_headers(self):
3416 eq = self.assertEqual
3417 neq = self.ndiffAssertEqual
3418 with openfile('msg_28.txt') as fp:
3419 msg = email.message_from_file(fp)
3420 # Structure is:
3421 # multipart/digest
3422 # message/rfc822
3423 # text/plain
3424 # message/rfc822
3425 # text/plain
3426 eq(msg.is_multipart(), 1)
3427 eq(len(msg.get_payload()), 2)
3428 part1 = msg.get_payload(0)
3429 eq(part1.get_content_type(), 'message/rfc822')
3430 eq(part1.is_multipart(), 1)
3431 eq(len(part1.get_payload()), 1)
3432 part1a = part1.get_payload(0)
3433 eq(part1a.is_multipart(), 0)
3434 eq(part1a.get_content_type(), 'text/plain')
3435 neq(part1a.get_payload(), 'message 1\n')
3436 # next message/rfc822
3437 part2 = msg.get_payload(1)
3438 eq(part2.get_content_type(), 'message/rfc822')
3439 eq(part2.is_multipart(), 1)
3440 eq(len(part2.get_payload()), 1)
3441 part2a = part2.get_payload(0)
3442 eq(part2a.is_multipart(), 0)
3443 eq(part2a.get_content_type(), 'text/plain')
3444 neq(part2a.get_payload(), 'message 2\n')
3445
3446 def test_three_lines(self):
3447 # A bug report by Andrew McNamara
3448 lines = ['From: Andrew Person <aperson@dom.ain',
3449 'Subject: Test',
3450 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3451 msg = email.message_from_string(NL.join(lines))
3452 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3453
3454 def test_strip_line_feed_and_carriage_return_in_headers(self):
3455 eq = self.assertEqual
3456 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3457 value1 = 'text'
3458 value2 = 'more text'
3459 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3460 value1, value2)
3461 msg = email.message_from_string(m)
3462 eq(msg.get('Header'), value1)
3463 eq(msg.get('Next-Header'), value2)
3464
3465 def test_rfc2822_header_syntax(self):
3466 eq = self.assertEqual
3467 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3468 msg = email.message_from_string(m)
3469 eq(len(msg), 3)
3470 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3471 eq(msg.get_payload(), 'body')
3472
3473 def test_rfc2822_space_not_allowed_in_header(self):
3474 eq = self.assertEqual
3475 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3476 msg = email.message_from_string(m)
3477 eq(len(msg.keys()), 0)
3478
3479 def test_rfc2822_one_character_header(self):
3480 eq = self.assertEqual
3481 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3482 msg = email.message_from_string(m)
3483 headers = msg.keys()
3484 headers.sort()
3485 eq(headers, ['A', 'B', 'CC'])
3486 eq(msg.get_payload(), 'body')
3487
R. David Murray45e0e142010-06-16 02:19:40 +00003488 def test_CRLFLF_at_end_of_part(self):
3489 # issue 5610: feedparser should not eat two chars from body part ending
3490 # with "\r\n\n".
3491 m = (
3492 "From: foo@bar.com\n"
3493 "To: baz\n"
3494 "Mime-Version: 1.0\n"
3495 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3496 "\n"
3497 "--BOUNDARY\n"
3498 "Content-Type: text/plain\n"
3499 "\n"
3500 "body ending with CRLF newline\r\n"
3501 "\n"
3502 "--BOUNDARY--\n"
3503 )
3504 msg = email.message_from_string(m)
3505 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003506
Ezio Melottib3aedd42010-11-20 19:04:17 +00003507
R. David Murray96fd54e2010-10-08 15:55:28 +00003508class Test8BitBytesHandling(unittest.TestCase):
3509 # In Python3 all input is string, but that doesn't work if the actual input
3510 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3511 # decode byte streams using the surrogateescape error handler, and
3512 # reconvert to binary at appropriate places if we detect surrogates. This
3513 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3514 # but it does allow us to parse and preserve them, and to decode body
3515 # parts that use an 8bit CTE.
3516
3517 bodytest_msg = textwrap.dedent("""\
3518 From: foo@bar.com
3519 To: baz
3520 Mime-Version: 1.0
3521 Content-Type: text/plain; charset={charset}
3522 Content-Transfer-Encoding: {cte}
3523
3524 {bodyline}
3525 """)
3526
3527 def test_known_8bit_CTE(self):
3528 m = self.bodytest_msg.format(charset='utf-8',
3529 cte='8bit',
3530 bodyline='pöstal').encode('utf-8')
3531 msg = email.message_from_bytes(m)
3532 self.assertEqual(msg.get_payload(), "pöstal\n")
3533 self.assertEqual(msg.get_payload(decode=True),
3534 "pöstal\n".encode('utf-8'))
3535
3536 def test_unknown_8bit_CTE(self):
3537 m = self.bodytest_msg.format(charset='notavalidcharset',
3538 cte='8bit',
3539 bodyline='pöstal').encode('utf-8')
3540 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003541 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003542 self.assertEqual(msg.get_payload(decode=True),
3543 "pöstal\n".encode('utf-8'))
3544
3545 def test_8bit_in_quopri_body(self):
3546 # This is non-RFC compliant data...without 'decode' the library code
3547 # decodes the body using the charset from the headers, and because the
3548 # source byte really is utf-8 this works. This is likely to fail
3549 # against real dirty data (ie: produce mojibake), but the data is
3550 # invalid anyway so it is as good a guess as any. But this means that
3551 # this test just confirms the current behavior; that behavior is not
3552 # necessarily the best possible behavior. With 'decode' it is
3553 # returning the raw bytes, so that test should be of correct behavior,
3554 # or at least produce the same result that email4 did.
3555 m = self.bodytest_msg.format(charset='utf-8',
3556 cte='quoted-printable',
3557 bodyline='p=C3=B6stál').encode('utf-8')
3558 msg = email.message_from_bytes(m)
3559 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3560 self.assertEqual(msg.get_payload(decode=True),
3561 'pöstál\n'.encode('utf-8'))
3562
3563 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3564 # This is similar to the previous test, but proves that if the 8bit
3565 # byte is undecodeable in the specified charset, it gets replaced
3566 # by the unicode 'unknown' character. Again, this may or may not
3567 # be the ideal behavior. Note that if decode=False none of the
3568 # decoders will get involved, so this is the only test we need
3569 # for this behavior.
3570 m = self.bodytest_msg.format(charset='ascii',
3571 cte='quoted-printable',
3572 bodyline='p=C3=B6stál').encode('utf-8')
3573 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003574 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003575 self.assertEqual(msg.get_payload(decode=True),
3576 'pöstál\n'.encode('utf-8'))
3577
R David Murray80e0aee2012-05-27 21:23:34 -04003578 # test_defect_handling:test_invalid_chars_in_base64_payload
R. David Murray96fd54e2010-10-08 15:55:28 +00003579 def test_8bit_in_base64_body(self):
R David Murray80e0aee2012-05-27 21:23:34 -04003580 # If we get 8bit bytes in a base64 body, we can just ignore them
3581 # as being outside the base64 alphabet and decode anyway. But
3582 # we register a defect.
R. David Murray96fd54e2010-10-08 15:55:28 +00003583 m = self.bodytest_msg.format(charset='utf-8',
3584 cte='base64',
3585 bodyline='cMO2c3RhbAá=').encode('utf-8')
3586 msg = email.message_from_bytes(m)
3587 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -04003588 'pöstal'.encode('utf-8'))
3589 self.assertIsInstance(msg.defects[0],
3590 errors.InvalidBase64CharactersDefect)
R. David Murray96fd54e2010-10-08 15:55:28 +00003591
3592 def test_8bit_in_uuencode_body(self):
3593 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3594 # normal means, so the block is returned undecoded, but as bytes.
3595 m = self.bodytest_msg.format(charset='utf-8',
3596 cte='uuencode',
3597 bodyline='<,.V<W1A; á ').encode('utf-8')
3598 msg = email.message_from_bytes(m)
3599 self.assertEqual(msg.get_payload(decode=True),
3600 '<,.V<W1A; á \n'.encode('utf-8'))
3601
3602
R. David Murray92532142011-01-07 23:25:30 +00003603 headertest_headers = (
3604 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3605 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3606 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3607 '\tJean de Baddie',
3608 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3609 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3610 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3611 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3612 )
3613 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3614 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003615
3616 def test_get_8bit_header(self):
3617 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003618 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3619 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003620
3621 def test_print_8bit_headers(self):
3622 msg = email.message_from_bytes(self.headertest_msg)
3623 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003624 textwrap.dedent("""\
3625 From: {}
3626 To: {}
3627 Subject: {}
3628 From: {}
3629
3630 Yes, they are flying.
3631 """).format(*[expected[1] for (_, expected) in
3632 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003633
3634 def test_values_with_8bit_headers(self):
3635 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003636 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003637 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003638 'b\uFFFD\uFFFDz',
3639 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3640 'coll\uFFFD\uFFFDgue, le pouf '
3641 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003642 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003643 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003644
3645 def test_items_with_8bit_headers(self):
3646 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003647 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003648 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003649 ('To', 'b\uFFFD\uFFFDz'),
3650 ('Subject', 'Maintenant je vous '
3651 'pr\uFFFD\uFFFDsente '
3652 'mon coll\uFFFD\uFFFDgue, le pouf '
3653 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3654 '\tJean de Baddie'),
3655 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003656
3657 def test_get_all_with_8bit_headers(self):
3658 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003659 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003660 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003661 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003662
R David Murraya2150232011-03-16 21:11:23 -04003663 def test_get_content_type_with_8bit(self):
3664 msg = email.message_from_bytes(textwrap.dedent("""\
3665 Content-Type: text/pl\xA7in; charset=utf-8
3666 """).encode('latin-1'))
3667 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3668 self.assertEqual(msg.get_content_maintype(), "text")
3669 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3670
R David Murray97f43c02012-06-24 05:03:27 -04003671 # test_headerregistry.TestContentTypeHeader.non_ascii_in_params
R David Murraya2150232011-03-16 21:11:23 -04003672 def test_get_params_with_8bit(self):
3673 msg = email.message_from_bytes(
3674 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3675 self.assertEqual(msg.get_params(header='x-header'),
3676 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3677 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3678 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3679 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3680
R David Murray97f43c02012-06-24 05:03:27 -04003681 # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value
R David Murraya2150232011-03-16 21:11:23 -04003682 def test_get_rfc2231_params_with_8bit(self):
3683 msg = email.message_from_bytes(textwrap.dedent("""\
3684 Content-Type: text/plain; charset=us-ascii;
3685 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3686 ).encode('latin-1'))
3687 self.assertEqual(msg.get_param('title'),
3688 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3689
3690 def test_set_rfc2231_params_with_8bit(self):
3691 msg = email.message_from_bytes(textwrap.dedent("""\
3692 Content-Type: text/plain; charset=us-ascii;
3693 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3694 ).encode('latin-1'))
3695 msg.set_param('title', 'test')
3696 self.assertEqual(msg.get_param('title'), 'test')
3697
3698 def test_del_rfc2231_params_with_8bit(self):
3699 msg = email.message_from_bytes(textwrap.dedent("""\
3700 Content-Type: text/plain; charset=us-ascii;
3701 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3702 ).encode('latin-1'))
3703 msg.del_param('title')
3704 self.assertEqual(msg.get_param('title'), None)
3705 self.assertEqual(msg.get_content_maintype(), 'text')
3706
3707 def test_get_payload_with_8bit_cte_header(self):
3708 msg = email.message_from_bytes(textwrap.dedent("""\
3709 Content-Transfer-Encoding: b\xa7se64
3710 Content-Type: text/plain; charset=latin-1
3711
3712 payload
3713 """).encode('latin-1'))
3714 self.assertEqual(msg.get_payload(), 'payload\n')
3715 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3716
R. David Murray96fd54e2010-10-08 15:55:28 +00003717 non_latin_bin_msg = textwrap.dedent("""\
3718 From: foo@bar.com
3719 To: báz
3720 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3721 \tJean de Baddie
3722 Mime-Version: 1.0
3723 Content-Type: text/plain; charset="utf-8"
3724 Content-Transfer-Encoding: 8bit
3725
3726 Да, они летят.
3727 """).encode('utf-8')
3728
3729 def test_bytes_generator(self):
3730 msg = email.message_from_bytes(self.non_latin_bin_msg)
3731 out = BytesIO()
3732 email.generator.BytesGenerator(out).flatten(msg)
3733 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3734
R. David Murray7372a072011-01-26 21:21:32 +00003735 def test_bytes_generator_handles_None_body(self):
3736 #Issue 11019
3737 msg = email.message.Message()
3738 out = BytesIO()
3739 email.generator.BytesGenerator(out).flatten(msg)
3740 self.assertEqual(out.getvalue(), b"\n")
3741
R. David Murray92532142011-01-07 23:25:30 +00003742 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003743 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003744 To: =?unknown-8bit?q?b=C3=A1z?=
3745 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3746 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3747 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003748 Mime-Version: 1.0
3749 Content-Type: text/plain; charset="utf-8"
3750 Content-Transfer-Encoding: base64
3751
3752 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3753 """)
3754
3755 def test_generator_handles_8bit(self):
3756 msg = email.message_from_bytes(self.non_latin_bin_msg)
3757 out = StringIO()
3758 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003759 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003760
3761 def test_bytes_generator_with_unix_from(self):
3762 # The unixfrom contains a current date, so we can't check it
3763 # literally. Just make sure the first word is 'From' and the
3764 # rest of the message matches the input.
3765 msg = email.message_from_bytes(self.non_latin_bin_msg)
3766 out = BytesIO()
3767 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3768 lines = out.getvalue().split(b'\n')
3769 self.assertEqual(lines[0].split()[0], b'From')
3770 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3771
R. David Murray92532142011-01-07 23:25:30 +00003772 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3773 non_latin_bin_msg_as7bit[2:4] = [
3774 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3775 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3776 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3777
R. David Murray96fd54e2010-10-08 15:55:28 +00003778 def test_message_from_binary_file(self):
3779 fn = 'test.msg'
3780 self.addCleanup(unlink, fn)
3781 with open(fn, 'wb') as testfile:
3782 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003783 with open(fn, 'rb') as testfile:
3784 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003785 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3786
3787 latin_bin_msg = textwrap.dedent("""\
3788 From: foo@bar.com
3789 To: Dinsdale
3790 Subject: Nudge nudge, wink, wink
3791 Mime-Version: 1.0
3792 Content-Type: text/plain; charset="latin-1"
3793 Content-Transfer-Encoding: 8bit
3794
3795 oh là là, know what I mean, know what I mean?
3796 """).encode('latin-1')
3797
3798 latin_bin_msg_as7bit = textwrap.dedent("""\
3799 From: foo@bar.com
3800 To: Dinsdale
3801 Subject: Nudge nudge, wink, wink
3802 Mime-Version: 1.0
3803 Content-Type: text/plain; charset="iso-8859-1"
3804 Content-Transfer-Encoding: quoted-printable
3805
3806 oh l=E0 l=E0, know what I mean, know what I mean?
3807 """)
3808
3809 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3810 m = email.message_from_bytes(self.latin_bin_msg)
3811 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3812
3813 def test_decoded_generator_emits_unicode_body(self):
3814 m = email.message_from_bytes(self.latin_bin_msg)
3815 out = StringIO()
3816 email.generator.DecodedGenerator(out).flatten(m)
3817 #DecodedHeader output contains an extra blank line compared
3818 #to the input message. RDM: not sure if this is a bug or not,
3819 #but it is not specific to the 8bit->7bit conversion.
3820 self.assertEqual(out.getvalue(),
3821 self.latin_bin_msg.decode('latin-1')+'\n')
3822
3823 def test_bytes_feedparser(self):
3824 bfp = email.feedparser.BytesFeedParser()
3825 for i in range(0, len(self.latin_bin_msg), 10):
3826 bfp.feed(self.latin_bin_msg[i:i+10])
3827 m = bfp.close()
3828 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3829
R. David Murray8451c4b2010-10-23 22:19:56 +00003830 def test_crlf_flatten(self):
3831 with openfile('msg_26.txt', 'rb') as fp:
3832 text = fp.read()
3833 msg = email.message_from_bytes(text)
3834 s = BytesIO()
3835 g = email.generator.BytesGenerator(s)
3836 g.flatten(msg, linesep='\r\n')
3837 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003838
3839 def test_8bit_multipart(self):
3840 # Issue 11605
3841 source = textwrap.dedent("""\
3842 Date: Fri, 18 Mar 2011 17:15:43 +0100
3843 To: foo@example.com
3844 From: foodwatch-Newsletter <bar@example.com>
3845 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3846 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3847 MIME-Version: 1.0
3848 Content-Type: multipart/alternative;
3849 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3850
3851 --b1_76a486bee62b0d200f33dc2ca08220ad
3852 Content-Type: text/plain; charset="utf-8"
3853 Content-Transfer-Encoding: 8bit
3854
3855 Guten Tag, ,
3856
3857 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3858 Nachrichten aus Japan.
3859
3860
3861 --b1_76a486bee62b0d200f33dc2ca08220ad
3862 Content-Type: text/html; charset="utf-8"
3863 Content-Transfer-Encoding: 8bit
3864
3865 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3866 "http://www.w3.org/TR/html4/loose.dtd">
3867 <html lang="de">
3868 <head>
3869 <title>foodwatch - Newsletter</title>
3870 </head>
3871 <body>
3872 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3873 die Nachrichten aus Japan.</p>
3874 </body>
3875 </html>
3876 --b1_76a486bee62b0d200f33dc2ca08220ad--
3877
3878 """).encode('utf-8')
3879 msg = email.message_from_bytes(source)
3880 s = BytesIO()
3881 g = email.generator.BytesGenerator(s)
3882 g.flatten(msg)
3883 self.assertEqual(s.getvalue(), source)
3884
R David Murray9fd170e2012-03-14 14:05:03 -04003885 def test_bytes_generator_b_encoding_linesep(self):
3886 # Issue 14062: b encoding was tacking on an extra \n.
3887 m = Message()
3888 # This has enough non-ascii that it should always end up b encoded.
3889 m['Subject'] = Header('žluťoučký kůň')
3890 s = BytesIO()
3891 g = email.generator.BytesGenerator(s)
3892 g.flatten(m, linesep='\r\n')
3893 self.assertEqual(
3894 s.getvalue(),
3895 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3896
3897 def test_generator_b_encoding_linesep(self):
3898 # Since this broke in ByteGenerator, test Generator for completeness.
3899 m = Message()
3900 # This has enough non-ascii that it should always end up b encoded.
3901 m['Subject'] = Header('žluťoučký kůň')
3902 s = StringIO()
3903 g = email.generator.Generator(s)
3904 g.flatten(m, linesep='\r\n')
3905 self.assertEqual(
3906 s.getvalue(),
3907 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3908
R. David Murray8451c4b2010-10-23 22:19:56 +00003909 maxDiff = None
3910
Ezio Melottib3aedd42010-11-20 19:04:17 +00003911
R. David Murray719a4492010-11-21 16:53:48 +00003912class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003913
R. David Murraye5db2632010-11-20 15:10:13 +00003914 maxDiff = None
3915
R. David Murray96fd54e2010-10-08 15:55:28 +00003916 def _msgobj(self, filename):
3917 with openfile(filename, 'rb') as fp:
3918 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003919 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003920 msg = email.message_from_bytes(data)
3921 return msg, data
3922
R. David Murray719a4492010-11-21 16:53:48 +00003923 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003924 b = BytesIO()
3925 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003926 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R David Murraya46ed112011-03-31 13:11:40 -04003927 self.assertEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003928
3929
R. David Murray719a4492010-11-21 16:53:48 +00003930class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3931 TestIdempotent):
3932 linesep = '\n'
3933 blinesep = b'\n'
3934 normalize_linesep_regex = re.compile(br'\r\n')
3935
3936
3937class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3938 TestIdempotent):
3939 linesep = '\r\n'
3940 blinesep = b'\r\n'
3941 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3942
Ezio Melottib3aedd42010-11-20 19:04:17 +00003943
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003944class TestBase64(unittest.TestCase):
3945 def test_len(self):
3946 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003947 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003948 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003949 for size in range(15):
3950 if size == 0 : bsize = 0
3951 elif size <= 3 : bsize = 4
3952 elif size <= 6 : bsize = 8
3953 elif size <= 9 : bsize = 12
3954 elif size <= 12: bsize = 16
3955 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003956 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003957
3958 def test_decode(self):
3959 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003960 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003961 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003962
3963 def test_encode(self):
3964 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003965 eq(base64mime.body_encode(b''), b'')
3966 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003967 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003968 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003969 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003970 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003971eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3972eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3973eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3974eHh4eCB4eHh4IA==
3975""")
3976 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003977 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003978 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003979eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3980eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3981eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3982eHh4eCB4eHh4IA==\r
3983""")
3984
3985 def test_header_encode(self):
3986 eq = self.assertEqual
3987 he = base64mime.header_encode
3988 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003989 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3990 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003991 # Test the charset option
3992 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3993 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003994
3995
Ezio Melottib3aedd42010-11-20 19:04:17 +00003996
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003997class TestQuopri(unittest.TestCase):
3998 def setUp(self):
3999 # Set of characters (as byte integers) that don't need to be encoded
4000 # in headers.
4001 self.hlit = list(chain(
4002 range(ord('a'), ord('z') + 1),
4003 range(ord('A'), ord('Z') + 1),
4004 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00004005 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004006 # Set of characters (as byte integers) that do need to be encoded in
4007 # headers.
4008 self.hnon = [c for c in range(256) if c not in self.hlit]
4009 assert len(self.hlit) + len(self.hnon) == 256
4010 # Set of characters (as byte integers) that don't need to be encoded
4011 # in bodies.
4012 self.blit = list(range(ord(' '), ord('~') + 1))
4013 self.blit.append(ord('\t'))
4014 self.blit.remove(ord('='))
4015 # Set of characters (as byte integers) that do need to be encoded in
4016 # bodies.
4017 self.bnon = [c for c in range(256) if c not in self.blit]
4018 assert len(self.blit) + len(self.bnon) == 256
4019
Guido van Rossum9604e662007-08-30 03:46:43 +00004020 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004021 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004022 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00004023 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004024 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004025 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00004026 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004027
Guido van Rossum9604e662007-08-30 03:46:43 +00004028 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004029 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004030 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00004031 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004032 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004033 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00004034 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004035
4036 def test_header_quopri_len(self):
4037 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00004038 eq(quoprimime.header_length(b'hello'), 5)
4039 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004040 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00004041 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004042 # =?xxx?q?...?= means 10 extra characters
4043 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00004044 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
4045 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004046 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00004047 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004048 # =?xxx?q?...?= means 10 extra characters
4049 10)
4050 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00004051 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004052 'expected length 1 for %r' % chr(c))
4053 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00004054 # Space is special; it's encoded to _
4055 if c == ord(' '):
4056 continue
4057 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004058 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00004059 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004060
4061 def test_body_quopri_len(self):
4062 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004063 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00004064 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004065 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00004066 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004067
4068 def test_quote_unquote_idempotent(self):
4069 for x in range(256):
4070 c = chr(x)
4071 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
4072
R David Murrayec1b5b82011-03-23 14:19:05 -04004073 def _test_header_encode(self, header, expected_encoded_header, charset=None):
4074 if charset is None:
4075 encoded_header = quoprimime.header_encode(header)
4076 else:
4077 encoded_header = quoprimime.header_encode(header, charset)
4078 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004079
R David Murraycafd79d2011-03-23 15:25:55 -04004080 def test_header_encode_null(self):
4081 self._test_header_encode(b'', '')
4082
R David Murrayec1b5b82011-03-23 14:19:05 -04004083 def test_header_encode_one_word(self):
4084 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
4085
4086 def test_header_encode_two_lines(self):
4087 self._test_header_encode(b'hello\nworld',
4088 '=?iso-8859-1?q?hello=0Aworld?=')
4089
4090 def test_header_encode_non_ascii(self):
4091 self._test_header_encode(b'hello\xc7there',
4092 '=?iso-8859-1?q?hello=C7there?=')
4093
4094 def test_header_encode_alt_charset(self):
4095 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
4096 charset='iso-8859-2')
4097
4098 def _test_header_decode(self, encoded_header, expected_decoded_header):
4099 decoded_header = quoprimime.header_decode(encoded_header)
4100 self.assertEqual(decoded_header, expected_decoded_header)
4101
4102 def test_header_decode_null(self):
4103 self._test_header_decode('', '')
4104
4105 def test_header_decode_one_word(self):
4106 self._test_header_decode('hello', 'hello')
4107
4108 def test_header_decode_two_lines(self):
4109 self._test_header_decode('hello=0Aworld', 'hello\nworld')
4110
4111 def test_header_decode_non_ascii(self):
4112 self._test_header_decode('hello=C7there', 'hello\xc7there')
4113
Ezio Melotti2a99d5d2013-07-06 17:16:04 +02004114 def test_header_decode_re_bug_18380(self):
4115 # Issue 18380: Call re.sub with a positional argument for flags in the wrong position
4116 self.assertEqual(quoprimime.header_decode('=30' * 257), '0' * 257)
4117
R David Murrayec1b5b82011-03-23 14:19:05 -04004118 def _test_decode(self, encoded, expected_decoded, eol=None):
4119 if eol is None:
4120 decoded = quoprimime.decode(encoded)
4121 else:
4122 decoded = quoprimime.decode(encoded, eol=eol)
4123 self.assertEqual(decoded, expected_decoded)
4124
4125 def test_decode_null_word(self):
4126 self._test_decode('', '')
4127
4128 def test_decode_null_line_null_word(self):
4129 self._test_decode('\r\n', '\n')
4130
4131 def test_decode_one_word(self):
4132 self._test_decode('hello', 'hello')
4133
4134 def test_decode_one_word_eol(self):
4135 self._test_decode('hello', 'hello', eol='X')
4136
4137 def test_decode_one_line(self):
4138 self._test_decode('hello\r\n', 'hello\n')
4139
4140 def test_decode_one_line_lf(self):
4141 self._test_decode('hello\n', 'hello\n')
4142
R David Murraycafd79d2011-03-23 15:25:55 -04004143 def test_decode_one_line_cr(self):
4144 self._test_decode('hello\r', 'hello\n')
4145
4146 def test_decode_one_line_nl(self):
4147 self._test_decode('hello\n', 'helloX', eol='X')
4148
4149 def test_decode_one_line_crnl(self):
4150 self._test_decode('hello\r\n', 'helloX', eol='X')
4151
R David Murrayec1b5b82011-03-23 14:19:05 -04004152 def test_decode_one_line_one_word(self):
4153 self._test_decode('hello\r\nworld', 'hello\nworld')
4154
4155 def test_decode_one_line_one_word_eol(self):
4156 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
4157
4158 def test_decode_two_lines(self):
4159 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
4160
R David Murraycafd79d2011-03-23 15:25:55 -04004161 def test_decode_two_lines_eol(self):
4162 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
4163
R David Murrayec1b5b82011-03-23 14:19:05 -04004164 def test_decode_one_long_line(self):
4165 self._test_decode('Spam' * 250, 'Spam' * 250)
4166
4167 def test_decode_one_space(self):
4168 self._test_decode(' ', '')
4169
4170 def test_decode_multiple_spaces(self):
4171 self._test_decode(' ' * 5, '')
4172
4173 def test_decode_one_line_trailing_spaces(self):
4174 self._test_decode('hello \r\n', 'hello\n')
4175
4176 def test_decode_two_lines_trailing_spaces(self):
4177 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
4178
4179 def test_decode_quoted_word(self):
4180 self._test_decode('=22quoted=20words=22', '"quoted words"')
4181
4182 def test_decode_uppercase_quoting(self):
4183 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
4184
4185 def test_decode_lowercase_quoting(self):
4186 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
4187
4188 def test_decode_soft_line_break(self):
4189 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
4190
4191 def test_decode_false_quoting(self):
4192 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
4193
4194 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
4195 kwargs = {}
4196 if maxlinelen is None:
4197 # Use body_encode's default.
4198 maxlinelen = 76
4199 else:
4200 kwargs['maxlinelen'] = maxlinelen
4201 if eol is None:
4202 # Use body_encode's default.
4203 eol = '\n'
4204 else:
4205 kwargs['eol'] = eol
4206 encoded_body = quoprimime.body_encode(body, **kwargs)
4207 self.assertEqual(encoded_body, expected_encoded_body)
4208 if eol == '\n' or eol == '\r\n':
4209 # We know how to split the result back into lines, so maxlinelen
4210 # can be checked.
4211 for line in encoded_body.splitlines():
4212 self.assertLessEqual(len(line), maxlinelen)
4213
4214 def test_encode_null(self):
4215 self._test_encode('', '')
4216
4217 def test_encode_null_lines(self):
4218 self._test_encode('\n\n', '\n\n')
4219
4220 def test_encode_one_line(self):
4221 self._test_encode('hello\n', 'hello\n')
4222
4223 def test_encode_one_line_crlf(self):
4224 self._test_encode('hello\r\n', 'hello\n')
4225
4226 def test_encode_one_line_eol(self):
4227 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
4228
4229 def test_encode_one_space(self):
4230 self._test_encode(' ', '=20')
4231
4232 def test_encode_one_line_one_space(self):
4233 self._test_encode(' \n', '=20\n')
4234
R David Murrayb938c8c2011-03-24 12:19:26 -04004235# XXX: body_encode() expect strings, but uses ord(char) from these strings
4236# to index into a 256-entry list. For code points above 255, this will fail.
4237# Should there be a check for 8-bit only ord() values in body, or at least
4238# a comment about the expected input?
4239
4240 def test_encode_two_lines_one_space(self):
4241 self._test_encode(' \n \n', '=20\n=20\n')
4242
R David Murrayec1b5b82011-03-23 14:19:05 -04004243 def test_encode_one_word_trailing_spaces(self):
4244 self._test_encode('hello ', 'hello =20')
4245
4246 def test_encode_one_line_trailing_spaces(self):
4247 self._test_encode('hello \n', 'hello =20\n')
4248
4249 def test_encode_one_word_trailing_tab(self):
4250 self._test_encode('hello \t', 'hello =09')
4251
4252 def test_encode_one_line_trailing_tab(self):
4253 self._test_encode('hello \t\n', 'hello =09\n')
4254
4255 def test_encode_trailing_space_before_maxlinelen(self):
4256 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
4257
R David Murrayb938c8c2011-03-24 12:19:26 -04004258 def test_encode_trailing_space_at_maxlinelen(self):
4259 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
4260
R David Murrayec1b5b82011-03-23 14:19:05 -04004261 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04004262 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
4263
4264 def test_encode_whitespace_lines(self):
4265 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04004266
4267 def test_encode_quoted_equals(self):
4268 self._test_encode('a = b', 'a =3D b')
4269
4270 def test_encode_one_long_string(self):
4271 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
4272
4273 def test_encode_one_long_line(self):
4274 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4275
4276 def test_encode_one_very_long_line(self):
4277 self._test_encode('x' * 200 + '\n',
4278 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
4279
R David Murrayec1b5b82011-03-23 14:19:05 -04004280 def test_encode_shortest_maxlinelen(self):
4281 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004282
R David Murrayb938c8c2011-03-24 12:19:26 -04004283 def test_encode_maxlinelen_too_small(self):
4284 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
4285
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004286 def test_encode(self):
4287 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00004288 eq(quoprimime.body_encode(''), '')
4289 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004290 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00004291 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004292 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00004293 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004294xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
4295 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
4296x xxxx xxxx xxxx xxxx=20""")
4297 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00004298 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4299 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004300xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
4301 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
4302x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00004303 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004304one line
4305
4306two line"""), """\
4307one line
4308
4309two line""")
4310
4311
Ezio Melottib3aedd42010-11-20 19:04:17 +00004312
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004313# Test the Charset class
4314class TestCharset(unittest.TestCase):
4315 def tearDown(self):
4316 from email import charset as CharsetModule
4317 try:
4318 del CharsetModule.CHARSETS['fake']
4319 except KeyError:
4320 pass
4321
Guido van Rossum9604e662007-08-30 03:46:43 +00004322 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004323 eq = self.assertEqual
4324 # Make sure us-ascii = no Unicode conversion
4325 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00004326 eq(c.header_encode('Hello World!'), 'Hello World!')
4327 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004328 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00004329 self.assertRaises(UnicodeError, c.header_encode, s)
4330 c = Charset('utf-8')
4331 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004332
4333 def test_body_encode(self):
4334 eq = self.assertEqual
4335 # Try a charset with QP body encoding
4336 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004337 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004338 # Try a charset with Base64 body encoding
4339 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00004340 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004341 # Try a charset with None body encoding
4342 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004343 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004344 # Try the convert argument, where input codec != output codec
4345 c = Charset('euc-jp')
4346 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00004347 # XXX FIXME
4348## try:
4349## eq('\x1b$B5FCO;~IW\x1b(B',
4350## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4351## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4352## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4353## except LookupError:
4354## # We probably don't have the Japanese codecs installed
4355## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004356 # Testing SF bug #625509, which we have to fake, since there are no
4357 # built-in encodings where the header encoding is QP but the body
4358 # encoding is not.
4359 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04004360 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004361 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04004362 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004363
4364 def test_unicode_charset_name(self):
4365 charset = Charset('us-ascii')
4366 self.assertEqual(str(charset), 'us-ascii')
4367 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4368
4369
Ezio Melottib3aedd42010-11-20 19:04:17 +00004370
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004371# Test multilingual MIME headers.
4372class TestHeader(TestEmailBase):
4373 def test_simple(self):
4374 eq = self.ndiffAssertEqual
4375 h = Header('Hello World!')
4376 eq(h.encode(), 'Hello World!')
4377 h.append(' Goodbye World!')
4378 eq(h.encode(), 'Hello World! Goodbye World!')
4379
4380 def test_simple_surprise(self):
4381 eq = self.ndiffAssertEqual
4382 h = Header('Hello World!')
4383 eq(h.encode(), 'Hello World!')
4384 h.append('Goodbye World!')
4385 eq(h.encode(), 'Hello World! Goodbye World!')
4386
4387 def test_header_needs_no_decoding(self):
4388 h = 'no decoding needed'
4389 self.assertEqual(decode_header(h), [(h, None)])
4390
4391 def test_long(self):
4392 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4393 maxlinelen=76)
4394 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004395 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004396
4397 def test_multilingual(self):
4398 eq = self.ndiffAssertEqual
4399 g = Charset("iso-8859-1")
4400 cz = Charset("iso-8859-2")
4401 utf8 = Charset("utf-8")
4402 g_head = (b'Die Mieter treten hier ein werden mit einem '
4403 b'Foerderband komfortabel den Korridor entlang, '
4404 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4405 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4406 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4407 b'd\xf9vtipu.. ')
4408 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4409 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4410 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4411 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4412 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4413 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4414 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4415 '\u3044\u307e\u3059\u3002')
4416 h = Header(g_head, g)
4417 h.append(cz_head, cz)
4418 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004419 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004420 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004421=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4422 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4423 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4424 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004425 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4426 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4427 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4428 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004429 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4430 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4431 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4432 decoded = decode_header(enc)
4433 eq(len(decoded), 3)
4434 eq(decoded[0], (g_head, 'iso-8859-1'))
4435 eq(decoded[1], (cz_head, 'iso-8859-2'))
4436 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004437 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004438 eq(ustr,
4439 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4440 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4441 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4442 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4443 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4444 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4445 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4446 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4447 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4448 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4449 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4450 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4451 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4452 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4453 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4454 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4455 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004456 # Test make_header()
4457 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004458 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004459
4460 def test_empty_header_encode(self):
4461 h = Header()
4462 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004463
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004464 def test_header_ctor_default_args(self):
4465 eq = self.ndiffAssertEqual
4466 h = Header()
4467 eq(h, '')
4468 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004469 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004470
4471 def test_explicit_maxlinelen(self):
4472 eq = self.ndiffAssertEqual
4473 hstr = ('A very long line that must get split to something other '
4474 'than at the 76th character boundary to test the non-default '
4475 'behavior')
4476 h = Header(hstr)
4477 eq(h.encode(), '''\
4478A very long line that must get split to something other than at the 76th
4479 character boundary to test the non-default behavior''')
4480 eq(str(h), hstr)
4481 h = Header(hstr, header_name='Subject')
4482 eq(h.encode(), '''\
4483A very long line that must get split to something other than at the
4484 76th character boundary to test the non-default behavior''')
4485 eq(str(h), hstr)
4486 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4487 eq(h.encode(), hstr)
4488 eq(str(h), hstr)
4489
Guido van Rossum9604e662007-08-30 03:46:43 +00004490 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004491 eq = self.ndiffAssertEqual
4492 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004493 x = 'xxxx ' * 20
4494 h.append(x)
4495 s = h.encode()
4496 eq(s, """\
4497=?iso-8859-1?q?xxx?=
4498 =?iso-8859-1?q?x_?=
4499 =?iso-8859-1?q?xx?=
4500 =?iso-8859-1?q?xx?=
4501 =?iso-8859-1?q?_x?=
4502 =?iso-8859-1?q?xx?=
4503 =?iso-8859-1?q?x_?=
4504 =?iso-8859-1?q?xx?=
4505 =?iso-8859-1?q?xx?=
4506 =?iso-8859-1?q?_x?=
4507 =?iso-8859-1?q?xx?=
4508 =?iso-8859-1?q?x_?=
4509 =?iso-8859-1?q?xx?=
4510 =?iso-8859-1?q?xx?=
4511 =?iso-8859-1?q?_x?=
4512 =?iso-8859-1?q?xx?=
4513 =?iso-8859-1?q?x_?=
4514 =?iso-8859-1?q?xx?=
4515 =?iso-8859-1?q?xx?=
4516 =?iso-8859-1?q?_x?=
4517 =?iso-8859-1?q?xx?=
4518 =?iso-8859-1?q?x_?=
4519 =?iso-8859-1?q?xx?=
4520 =?iso-8859-1?q?xx?=
4521 =?iso-8859-1?q?_x?=
4522 =?iso-8859-1?q?xx?=
4523 =?iso-8859-1?q?x_?=
4524 =?iso-8859-1?q?xx?=
4525 =?iso-8859-1?q?xx?=
4526 =?iso-8859-1?q?_x?=
4527 =?iso-8859-1?q?xx?=
4528 =?iso-8859-1?q?x_?=
4529 =?iso-8859-1?q?xx?=
4530 =?iso-8859-1?q?xx?=
4531 =?iso-8859-1?q?_x?=
4532 =?iso-8859-1?q?xx?=
4533 =?iso-8859-1?q?x_?=
4534 =?iso-8859-1?q?xx?=
4535 =?iso-8859-1?q?xx?=
4536 =?iso-8859-1?q?_x?=
4537 =?iso-8859-1?q?xx?=
4538 =?iso-8859-1?q?x_?=
4539 =?iso-8859-1?q?xx?=
4540 =?iso-8859-1?q?xx?=
4541 =?iso-8859-1?q?_x?=
4542 =?iso-8859-1?q?xx?=
4543 =?iso-8859-1?q?x_?=
4544 =?iso-8859-1?q?xx?=
4545 =?iso-8859-1?q?xx?=
4546 =?iso-8859-1?q?_?=""")
4547 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004548 h = Header(charset='iso-8859-1', maxlinelen=40)
4549 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004550 s = h.encode()
4551 eq(s, """\
4552=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4553 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4554 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4555 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4556 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4557 eq(x, str(make_header(decode_header(s))))
4558
4559 def test_base64_splittable(self):
4560 eq = self.ndiffAssertEqual
4561 h = Header(charset='koi8-r', maxlinelen=20)
4562 x = 'xxxx ' * 20
4563 h.append(x)
4564 s = h.encode()
4565 eq(s, """\
4566=?koi8-r?b?eHh4?=
4567 =?koi8-r?b?eCB4?=
4568 =?koi8-r?b?eHh4?=
4569 =?koi8-r?b?IHh4?=
4570 =?koi8-r?b?eHgg?=
4571 =?koi8-r?b?eHh4?=
4572 =?koi8-r?b?eCB4?=
4573 =?koi8-r?b?eHh4?=
4574 =?koi8-r?b?IHh4?=
4575 =?koi8-r?b?eHgg?=
4576 =?koi8-r?b?eHh4?=
4577 =?koi8-r?b?eCB4?=
4578 =?koi8-r?b?eHh4?=
4579 =?koi8-r?b?IHh4?=
4580 =?koi8-r?b?eHgg?=
4581 =?koi8-r?b?eHh4?=
4582 =?koi8-r?b?eCB4?=
4583 =?koi8-r?b?eHh4?=
4584 =?koi8-r?b?IHh4?=
4585 =?koi8-r?b?eHgg?=
4586 =?koi8-r?b?eHh4?=
4587 =?koi8-r?b?eCB4?=
4588 =?koi8-r?b?eHh4?=
4589 =?koi8-r?b?IHh4?=
4590 =?koi8-r?b?eHgg?=
4591 =?koi8-r?b?eHh4?=
4592 =?koi8-r?b?eCB4?=
4593 =?koi8-r?b?eHh4?=
4594 =?koi8-r?b?IHh4?=
4595 =?koi8-r?b?eHgg?=
4596 =?koi8-r?b?eHh4?=
4597 =?koi8-r?b?eCB4?=
4598 =?koi8-r?b?eHh4?=
4599 =?koi8-r?b?IA==?=""")
4600 eq(x, str(make_header(decode_header(s))))
4601 h = Header(charset='koi8-r', maxlinelen=40)
4602 h.append(x)
4603 s = h.encode()
4604 eq(s, """\
4605=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4606 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4607 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4608 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4609 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4610 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4611 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004612
4613 def test_us_ascii_header(self):
4614 eq = self.assertEqual
4615 s = 'hello'
4616 x = decode_header(s)
4617 eq(x, [('hello', None)])
4618 h = make_header(x)
4619 eq(s, h.encode())
4620
4621 def test_string_charset(self):
4622 eq = self.assertEqual
4623 h = Header()
4624 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004625 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004626
4627## def test_unicode_error(self):
4628## raises = self.assertRaises
4629## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4630## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4631## h = Header()
4632## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4633## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4634## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4635
4636 def test_utf8_shortest(self):
4637 eq = self.assertEqual
4638 h = Header('p\xf6stal', 'utf-8')
4639 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4640 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4641 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4642
4643 def test_bad_8bit_header(self):
4644 raises = self.assertRaises
4645 eq = self.assertEqual
4646 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4647 raises(UnicodeError, Header, x)
4648 h = Header()
4649 raises(UnicodeError, h.append, x)
4650 e = x.decode('utf-8', 'replace')
4651 eq(str(Header(x, errors='replace')), e)
4652 h.append(x, errors='replace')
4653 eq(str(h), e)
4654
R David Murray041015c2011-03-25 15:10:55 -04004655 def test_escaped_8bit_header(self):
4656 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004657 e = x.decode('ascii', 'surrogateescape')
4658 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004659 self.assertEqual(str(h),
4660 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4661 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4662
R David Murraye5e366c2011-06-18 12:57:28 -04004663 def test_header_handles_binary_unknown8bit(self):
4664 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4665 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4666 self.assertEqual(str(h),
4667 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4668 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4669
4670 def test_make_header_handles_binary_unknown8bit(self):
4671 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4672 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4673 h2 = email.header.make_header(email.header.decode_header(h))
4674 self.assertEqual(str(h2),
4675 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4676 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4677
R David Murray041015c2011-03-25 15:10:55 -04004678 def test_modify_returned_list_does_not_change_header(self):
4679 h = Header('test')
4680 chunks = email.header.decode_header(h)
4681 chunks.append(('ascii', 'test2'))
4682 self.assertEqual(str(h), 'test')
4683
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004684 def test_encoded_adjacent_nonencoded(self):
4685 eq = self.assertEqual
4686 h = Header()
4687 h.append('hello', 'iso-8859-1')
4688 h.append('world')
4689 s = h.encode()
4690 eq(s, '=?iso-8859-1?q?hello?= world')
4691 h = make_header(decode_header(s))
4692 eq(h.encode(), s)
4693
R David Murray07ea53c2012-06-02 17:56:49 -04004694 def test_whitespace_keeper(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004695 eq = self.assertEqual
4696 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4697 parts = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04004698 eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004699 hdr = make_header(parts)
4700 eq(hdr.encode(),
4701 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4702
4703 def test_broken_base64_header(self):
4704 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004705 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004706 raises(errors.HeaderParseError, decode_header, s)
4707
R. David Murray477efb32011-01-05 01:39:32 +00004708 def test_shift_jis_charset(self):
4709 h = Header('文', charset='shift_jis')
4710 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4711
R David Murrayde912762011-03-16 18:26:23 -04004712 def test_flatten_header_with_no_value(self):
4713 # Issue 11401 (regression from email 4.x) Note that the space after
4714 # the header doesn't reflect the input, but this is also the way
4715 # email 4.x behaved. At some point it would be nice to fix that.
4716 msg = email.message_from_string("EmptyHeader:")
4717 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4718
R David Murray01581ee2011-04-18 10:04:34 -04004719 def test_encode_preserves_leading_ws_on_value(self):
4720 msg = Message()
4721 msg['SomeHeader'] = ' value with leading ws'
4722 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4723
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004724
Ezio Melottib3aedd42010-11-20 19:04:17 +00004725
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004726# Test RFC 2231 header parameters (en/de)coding
4727class TestRFC2231(TestEmailBase):
R David Murray97f43c02012-06-24 05:03:27 -04004728
4729 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
4730 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004731 def test_get_param(self):
4732 eq = self.assertEqual
4733 msg = self._msgobj('msg_29.txt')
4734 eq(msg.get_param('title'),
4735 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4736 eq(msg.get_param('title', unquote=False),
4737 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4738
4739 def test_set_param(self):
4740 eq = self.ndiffAssertEqual
4741 msg = Message()
4742 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4743 charset='us-ascii')
4744 eq(msg.get_param('title'),
4745 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4746 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4747 charset='us-ascii', language='en')
4748 eq(msg.get_param('title'),
4749 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4750 msg = self._msgobj('msg_01.txt')
4751 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4752 charset='us-ascii', language='en')
4753 eq(msg.as_string(maxheaderlen=78), """\
4754Return-Path: <bbb@zzz.org>
4755Delivered-To: bbb@zzz.org
4756Received: by mail.zzz.org (Postfix, from userid 889)
4757\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4758MIME-Version: 1.0
4759Content-Transfer-Encoding: 7bit
4760Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4761From: bbb@ddd.com (John X. Doe)
4762To: bbb@zzz.org
4763Subject: This is a test message
4764Date: Fri, 4 May 2001 14:05:44 -0400
4765Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004766 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004767
4768
4769Hi,
4770
4771Do you like this message?
4772
4773-Me
4774""")
4775
R David Murraya2860e82011-04-16 09:20:30 -04004776 def test_set_param_requote(self):
4777 msg = Message()
4778 msg.set_param('title', 'foo')
4779 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4780 msg.set_param('title', 'bar', requote=False)
4781 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4782 # tspecial is still quoted.
4783 msg.set_param('title', "(bar)bell", requote=False)
4784 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4785
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004786 def test_del_param(self):
4787 eq = self.ndiffAssertEqual
4788 msg = self._msgobj('msg_01.txt')
4789 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4790 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4791 charset='us-ascii', language='en')
4792 msg.del_param('foo', header='Content-Type')
4793 eq(msg.as_string(maxheaderlen=78), """\
4794Return-Path: <bbb@zzz.org>
4795Delivered-To: bbb@zzz.org
4796Received: by mail.zzz.org (Postfix, from userid 889)
4797\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4798MIME-Version: 1.0
4799Content-Transfer-Encoding: 7bit
4800Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4801From: bbb@ddd.com (John X. Doe)
4802To: bbb@zzz.org
4803Subject: This is a test message
4804Date: Fri, 4 May 2001 14:05:44 -0400
4805Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004806 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004807
4808
4809Hi,
4810
4811Do you like this message?
4812
4813-Me
4814""")
4815
R David Murray97f43c02012-06-24 05:03:27 -04004816 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset
4817 # I changed the charset name, though, because the one in the file isn't
4818 # a legal charset name. Should add a test for an illegal charset.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004819 def test_rfc2231_get_content_charset(self):
4820 eq = self.assertEqual
4821 msg = self._msgobj('msg_32.txt')
4822 eq(msg.get_content_charset(), 'us-ascii')
4823
R David Murray97f43c02012-06-24 05:03:27 -04004824 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes
R. David Murraydfd7eb02010-12-24 22:36:49 +00004825 def test_rfc2231_parse_rfc_quoting(self):
4826 m = textwrap.dedent('''\
4827 Content-Disposition: inline;
4828 \tfilename*0*=''This%20is%20even%20more%20;
4829 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4830 \tfilename*2="is it not.pdf"
4831
4832 ''')
4833 msg = email.message_from_string(m)
4834 self.assertEqual(msg.get_filename(),
4835 'This is even more ***fun*** is it not.pdf')
4836 self.assertEqual(m, msg.as_string())
4837
R David Murray97f43c02012-06-24 05:03:27 -04004838 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
R. David Murraydfd7eb02010-12-24 22:36:49 +00004839 def test_rfc2231_parse_extra_quoting(self):
4840 m = textwrap.dedent('''\
4841 Content-Disposition: inline;
4842 \tfilename*0*="''This%20is%20even%20more%20";
4843 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4844 \tfilename*2="is it not.pdf"
4845
4846 ''')
4847 msg = email.message_from_string(m)
4848 self.assertEqual(msg.get_filename(),
4849 'This is even more ***fun*** is it not.pdf')
4850 self.assertEqual(m, msg.as_string())
4851
R David Murray97f43c02012-06-24 05:03:27 -04004852 # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset
4853 # but new test uses *0* because otherwise lang/charset is not valid.
4854 # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004855 def test_rfc2231_no_language_or_charset(self):
4856 m = '''\
4857Content-Transfer-Encoding: 8bit
4858Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4859Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4860
4861'''
4862 msg = email.message_from_string(m)
4863 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004864 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004865 self.assertEqual(
4866 param,
4867 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4868
R David Murray97f43c02012-06-24 05:03:27 -04004869 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004870 def test_rfc2231_no_language_or_charset_in_filename(self):
4871 m = '''\
4872Content-Disposition: inline;
4873\tfilename*0*="''This%20is%20even%20more%20";
4874\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4875\tfilename*2="is it not.pdf"
4876
4877'''
4878 msg = email.message_from_string(m)
4879 self.assertEqual(msg.get_filename(),
4880 'This is even more ***fun*** is it not.pdf')
4881
R David Murray97f43c02012-06-24 05:03:27 -04004882 # Duplicate of previous test?
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004883 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4884 m = '''\
4885Content-Disposition: inline;
4886\tfilename*0*="''This%20is%20even%20more%20";
4887\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4888\tfilename*2="is it not.pdf"
4889
4890'''
4891 msg = email.message_from_string(m)
4892 self.assertEqual(msg.get_filename(),
4893 'This is even more ***fun*** is it not.pdf')
4894
R David Murray97f43c02012-06-24 05:03:27 -04004895 # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded,
4896 # but the test below is wrong (the first part should be decoded).
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004897 def test_rfc2231_partly_encoded(self):
4898 m = '''\
4899Content-Disposition: inline;
4900\tfilename*0="''This%20is%20even%20more%20";
4901\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4902\tfilename*2="is it not.pdf"
4903
4904'''
4905 msg = email.message_from_string(m)
4906 self.assertEqual(
4907 msg.get_filename(),
4908 'This%20is%20even%20more%20***fun*** is it not.pdf')
4909
4910 def test_rfc2231_partly_nonencoded(self):
4911 m = '''\
4912Content-Disposition: inline;
4913\tfilename*0="This%20is%20even%20more%20";
4914\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4915\tfilename*2="is it not.pdf"
4916
4917'''
4918 msg = email.message_from_string(m)
4919 self.assertEqual(
4920 msg.get_filename(),
4921 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4922
4923 def test_rfc2231_no_language_or_charset_in_boundary(self):
4924 m = '''\
4925Content-Type: multipart/alternative;
4926\tboundary*0*="''This%20is%20even%20more%20";
4927\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4928\tboundary*2="is it not.pdf"
4929
4930'''
4931 msg = email.message_from_string(m)
4932 self.assertEqual(msg.get_boundary(),
4933 'This is even more ***fun*** is it not.pdf')
4934
4935 def test_rfc2231_no_language_or_charset_in_charset(self):
4936 # This is a nonsensical charset value, but tests the code anyway
4937 m = '''\
4938Content-Type: text/plain;
4939\tcharset*0*="This%20is%20even%20more%20";
4940\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4941\tcharset*2="is it not.pdf"
4942
4943'''
4944 msg = email.message_from_string(m)
4945 self.assertEqual(msg.get_content_charset(),
4946 'this is even more ***fun*** is it not.pdf')
4947
R David Murray97f43c02012-06-24 05:03:27 -04004948 # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004949 def test_rfc2231_bad_encoding_in_filename(self):
4950 m = '''\
4951Content-Disposition: inline;
4952\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4953\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4954\tfilename*2="is it not.pdf"
4955
4956'''
4957 msg = email.message_from_string(m)
4958 self.assertEqual(msg.get_filename(),
4959 'This is even more ***fun*** is it not.pdf')
4960
4961 def test_rfc2231_bad_encoding_in_charset(self):
4962 m = """\
4963Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4964
4965"""
4966 msg = email.message_from_string(m)
4967 # This should return None because non-ascii characters in the charset
4968 # are not allowed.
4969 self.assertEqual(msg.get_content_charset(), None)
4970
4971 def test_rfc2231_bad_character_in_charset(self):
4972 m = """\
4973Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4974
4975"""
4976 msg = email.message_from_string(m)
4977 # This should return None because non-ascii characters in the charset
4978 # are not allowed.
4979 self.assertEqual(msg.get_content_charset(), None)
4980
4981 def test_rfc2231_bad_character_in_filename(self):
4982 m = '''\
4983Content-Disposition: inline;
4984\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4985\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4986\tfilename*2*="is it not.pdf%E2"
4987
4988'''
4989 msg = email.message_from_string(m)
4990 self.assertEqual(msg.get_filename(),
4991 'This is even more ***fun*** is it not.pdf\ufffd')
4992
4993 def test_rfc2231_unknown_encoding(self):
4994 m = """\
4995Content-Transfer-Encoding: 8bit
4996Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4997
4998"""
4999 msg = email.message_from_string(m)
5000 self.assertEqual(msg.get_filename(), 'myfile.txt')
5001
5002 def test_rfc2231_single_tick_in_filename_extended(self):
5003 eq = self.assertEqual
5004 m = """\
5005Content-Type: application/x-foo;
5006\tname*0*=\"Frank's\"; name*1*=\" Document\"
5007
5008"""
5009 msg = email.message_from_string(m)
5010 charset, language, s = msg.get_param('name')
5011 eq(charset, None)
5012 eq(language, None)
5013 eq(s, "Frank's Document")
5014
R David Murray97f43c02012-06-24 05:03:27 -04005015 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005016 def test_rfc2231_single_tick_in_filename(self):
5017 m = """\
5018Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
5019
5020"""
5021 msg = email.message_from_string(m)
5022 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00005023 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005024 self.assertEqual(param, "Frank's Document")
5025
R David Murray97f43c02012-06-24 05:03:27 -04005026 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005027 def test_rfc2231_tick_attack_extended(self):
5028 eq = self.assertEqual
5029 m = """\
5030Content-Type: application/x-foo;
5031\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
5032
5033"""
5034 msg = email.message_from_string(m)
5035 charset, language, s = msg.get_param('name')
5036 eq(charset, 'us-ascii')
5037 eq(language, 'en-us')
5038 eq(s, "Frank's Document")
5039
R David Murray97f43c02012-06-24 05:03:27 -04005040 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005041 def test_rfc2231_tick_attack(self):
5042 m = """\
5043Content-Type: application/x-foo;
5044\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
5045
5046"""
5047 msg = email.message_from_string(m)
5048 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00005049 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005050 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
5051
R David Murray97f43c02012-06-24 05:03:27 -04005052 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005053 def test_rfc2231_no_extended_values(self):
5054 eq = self.assertEqual
5055 m = """\
5056Content-Type: application/x-foo; name=\"Frank's Document\"
5057
5058"""
5059 msg = email.message_from_string(m)
5060 eq(msg.get_param('name'), "Frank's Document")
5061
R David Murray97f43c02012-06-24 05:03:27 -04005062 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005063 def test_rfc2231_encoded_then_unencoded_segments(self):
5064 eq = self.assertEqual
5065 m = """\
5066Content-Type: application/x-foo;
5067\tname*0*=\"us-ascii'en-us'My\";
5068\tname*1=\" Document\";
5069\tname*2*=\" For You\"
5070
5071"""
5072 msg = email.message_from_string(m)
5073 charset, language, s = msg.get_param('name')
5074 eq(charset, 'us-ascii')
5075 eq(language, 'en-us')
5076 eq(s, 'My Document For You')
5077
R David Murray97f43c02012-06-24 05:03:27 -04005078 # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments
5079 # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005080 def test_rfc2231_unencoded_then_encoded_segments(self):
5081 eq = self.assertEqual
5082 m = """\
5083Content-Type: application/x-foo;
5084\tname*0=\"us-ascii'en-us'My\";
5085\tname*1*=\" Document\";
5086\tname*2*=\" For You\"
5087
5088"""
5089 msg = email.message_from_string(m)
5090 charset, language, s = msg.get_param('name')
5091 eq(charset, 'us-ascii')
5092 eq(language, 'en-us')
5093 eq(s, 'My Document For You')
5094
5095
Ezio Melottib3aedd42010-11-20 19:04:17 +00005096
R. David Murraya8f480f2010-01-16 18:30:03 +00005097# Tests to ensure that signed parts of an email are completely preserved, as
5098# required by RFC1847 section 2.1. Note that these are incomplete, because the
5099# email package does not currently always preserve the body. See issue 1670765.
5100class TestSigned(TestEmailBase):
5101
5102 def _msg_and_obj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -04005103 with openfile(filename) as fp:
R. David Murraya8f480f2010-01-16 18:30:03 +00005104 original = fp.read()
5105 msg = email.message_from_string(original)
5106 return original, msg
5107
5108 def _signed_parts_eq(self, original, result):
5109 # Extract the first mime part of each message
5110 import re
5111 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
5112 inpart = repart.search(original).group(2)
5113 outpart = repart.search(result).group(2)
5114 self.assertEqual(outpart, inpart)
5115
5116 def test_long_headers_as_string(self):
5117 original, msg = self._msg_and_obj('msg_45.txt')
5118 result = msg.as_string()
5119 self._signed_parts_eq(original, result)
5120
5121 def test_long_headers_as_string_maxheaderlen(self):
5122 original, msg = self._msg_and_obj('msg_45.txt')
5123 result = msg.as_string(maxheaderlen=60)
5124 self._signed_parts_eq(original, result)
5125
5126 def test_long_headers_flatten(self):
5127 original, msg = self._msg_and_obj('msg_45.txt')
5128 fp = StringIO()
5129 Generator(fp).flatten(msg)
5130 result = fp.getvalue()
5131 self._signed_parts_eq(original, result)
5132
5133
Ezio Melottib3aedd42010-11-20 19:04:17 +00005134
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005135if __name__ == '__main__':
R David Murray9aaba782011-03-21 17:17:06 -04005136 unittest.main()