blob: 3507b1e46493a3ee3437d4bb72027a0dde32b1f6 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
R. David Murray719a4492010-11-21 16:53:48 +00005import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00006import time
7import base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +00008import unittest
R. David Murray96fd54e2010-10-08 15:55:28 +00009import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000010
R. David Murray96fd54e2010-10-08 15:55:28 +000011from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000012from itertools import chain
13
14import email
R David Murrayc27e5222012-05-25 15:01:48 -040015import email.policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016
17from email.charset import Charset
18from email.header import Header, decode_header, make_header
19from email.parser import Parser, HeaderParser
R David Murray638d40b2012-08-24 11:14:13 -040020from email.generator import Generator, DecodedGenerator, BytesGenerator
Guido van Rossum8b3febe2007-08-30 01:15:14 +000021from email.message import Message
22from email.mime.application import MIMEApplication
23from email.mime.audio import MIMEAudio
24from email.mime.text import MIMEText
25from email.mime.image import MIMEImage
26from email.mime.base import MIMEBase
27from email.mime.message import MIMEMessage
28from email.mime.multipart import MIMEMultipart
29from email import utils
30from email import errors
31from email import encoders
32from email import iterators
33from email import base64mime
34from email import quoprimime
35
R David Murray965794e2013-03-07 18:16:47 -050036from test.support import unlink
R David Murraya256bac2011-03-31 12:20:23 -040037from test.test_email import openfile, TestEmailBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +000038
R David Murray612528d2013-03-15 20:38:15 -040039# These imports are documented to work, but we are testing them using a
40# different path, so we import them here just to make sure they are importable.
41from email.parser import FeedParser, BytesFeedParser
42
Guido van Rossum8b3febe2007-08-30 01:15:14 +000043NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Guido van Rossum8b3febe2007-08-30 01:15:14 +000048# Test various aspects of the Message class's API
49class TestMessageAPI(TestEmailBase):
50 def test_get_all(self):
51 eq = self.assertEqual
52 msg = self._msgobj('msg_20.txt')
53 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
54 eq(msg.get_all('xx', 'n/a'), 'n/a')
55
R. David Murraye5db2632010-11-20 15:10:13 +000056 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000057 eq = self.assertEqual
58 msg = Message()
59 eq(msg.get_charset(), None)
60 charset = Charset('iso-8859-1')
61 msg.set_charset(charset)
62 eq(msg['mime-version'], '1.0')
63 eq(msg.get_content_type(), 'text/plain')
64 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
65 eq(msg.get_param('charset'), 'iso-8859-1')
66 eq(msg['content-transfer-encoding'], 'quoted-printable')
67 eq(msg.get_charset().input_charset, 'iso-8859-1')
68 # Remove the charset
69 msg.set_charset(None)
70 eq(msg.get_charset(), None)
71 eq(msg['content-type'], 'text/plain')
72 # Try adding a charset when there's already MIME headers present
73 msg = Message()
74 msg['MIME-Version'] = '2.0'
75 msg['Content-Type'] = 'text/x-weird'
76 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
77 msg.set_charset(charset)
78 eq(msg['mime-version'], '2.0')
79 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
80 eq(msg['content-transfer-encoding'], 'quinted-puntable')
81
82 def test_set_charset_from_string(self):
83 eq = self.assertEqual
84 msg = Message()
85 msg.set_charset('us-ascii')
86 eq(msg.get_charset().input_charset, 'us-ascii')
87 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
88
89 def test_set_payload_with_charset(self):
90 msg = Message()
91 charset = Charset('iso-8859-1')
92 msg.set_payload('This is a string payload', charset)
93 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
94
95 def test_get_charsets(self):
96 eq = self.assertEqual
97
98 msg = self._msgobj('msg_08.txt')
99 charsets = msg.get_charsets()
100 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
101
102 msg = self._msgobj('msg_09.txt')
103 charsets = msg.get_charsets('dingbat')
104 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
105 'koi8-r'])
106
107 msg = self._msgobj('msg_12.txt')
108 charsets = msg.get_charsets()
109 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
110 'iso-8859-3', 'us-ascii', 'koi8-r'])
111
112 def test_get_filename(self):
113 eq = self.assertEqual
114
115 msg = self._msgobj('msg_04.txt')
116 filenames = [p.get_filename() for p in msg.get_payload()]
117 eq(filenames, ['msg.txt', 'msg.txt'])
118
119 msg = self._msgobj('msg_07.txt')
120 subpart = msg.get_payload(1)
121 eq(subpart.get_filename(), 'dingusfish.gif')
122
123 def test_get_filename_with_name_parameter(self):
124 eq = self.assertEqual
125
126 msg = self._msgobj('msg_44.txt')
127 filenames = [p.get_filename() for p in msg.get_payload()]
128 eq(filenames, ['msg.txt', 'msg.txt'])
129
130 def test_get_boundary(self):
131 eq = self.assertEqual
132 msg = self._msgobj('msg_07.txt')
133 # No quotes!
134 eq(msg.get_boundary(), 'BOUNDARY')
135
136 def test_set_boundary(self):
137 eq = self.assertEqual
138 # This one has no existing boundary parameter, but the Content-Type:
139 # header appears fifth.
140 msg = self._msgobj('msg_01.txt')
141 msg.set_boundary('BOUNDARY')
142 header, value = msg.items()[4]
143 eq(header.lower(), 'content-type')
144 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
145 # This one has a Content-Type: header, with a boundary, stuck in the
146 # middle of its headers. Make sure the order is preserved; it should
147 # be fifth.
148 msg = self._msgobj('msg_04.txt')
149 msg.set_boundary('BOUNDARY')
150 header, value = msg.items()[4]
151 eq(header.lower(), 'content-type')
152 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
153 # And this one has no Content-Type: header at all.
154 msg = self._msgobj('msg_03.txt')
155 self.assertRaises(errors.HeaderParseError,
156 msg.set_boundary, 'BOUNDARY')
157
R. David Murray73a559d2010-12-21 18:07:59 +0000158 def test_make_boundary(self):
159 msg = MIMEMultipart('form-data')
160 # Note that when the boundary gets created is an implementation
161 # detail and might change.
162 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
163 # Trigger creation of boundary
164 msg.as_string()
165 self.assertEqual(msg.items()[0][1][:33],
166 'multipart/form-data; boundary="==')
167 # XXX: there ought to be tests of the uniqueness of the boundary, too.
168
R. David Murray57c45ac2010-02-21 04:39:40 +0000169 def test_message_rfc822_only(self):
170 # Issue 7970: message/rfc822 not in multipart parsed by
171 # HeaderParser caused an exception when flattened.
R David Murray28346b82011-03-31 11:40:20 -0400172 with openfile('msg_46.txt') as fp:
Brett Cannon384917a2010-10-29 23:08:36 +0000173 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000174 parser = HeaderParser()
175 msg = parser.parsestr(msgdata)
176 out = StringIO()
177 gen = Generator(out, True, 0)
178 gen.flatten(msg, False)
179 self.assertEqual(out.getvalue(), msgdata)
180
R David Murrayb35c8502011-04-13 16:46:05 -0400181 def test_byte_message_rfc822_only(self):
182 # Make sure new bytes header parser also passes this.
183 with openfile('msg_46.txt', 'rb') as fp:
184 msgdata = fp.read()
185 parser = email.parser.BytesHeaderParser()
186 msg = parser.parsebytes(msgdata)
187 out = BytesIO()
188 gen = email.generator.BytesGenerator(out)
189 gen.flatten(msg)
190 self.assertEqual(out.getvalue(), msgdata)
191
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000192 def test_get_decoded_payload(self):
193 eq = self.assertEqual
194 msg = self._msgobj('msg_10.txt')
195 # The outer message is a multipart
196 eq(msg.get_payload(decode=True), None)
197 # Subpart 1 is 7bit encoded
198 eq(msg.get_payload(0).get_payload(decode=True),
199 b'This is a 7bit encoded message.\n')
200 # Subpart 2 is quopri
201 eq(msg.get_payload(1).get_payload(decode=True),
202 b'\xa1This is a Quoted Printable encoded message!\n')
203 # Subpart 3 is base64
204 eq(msg.get_payload(2).get_payload(decode=True),
205 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000206 # Subpart 4 is base64 with a trailing newline, which
207 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000208 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000209 b'This is a Base64 encoded message.\n')
210 # Subpart 5 has no Content-Transfer-Encoding: header.
211 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000212 b'This has no Content-Transfer-Encoding: header.\n')
213
214 def test_get_decoded_uu_payload(self):
215 eq = self.assertEqual
216 msg = Message()
217 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
218 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
219 msg['content-transfer-encoding'] = cte
220 eq(msg.get_payload(decode=True), b'hello world')
221 # Now try some bogus data
222 msg.set_payload('foo')
223 eq(msg.get_payload(decode=True), b'foo')
224
R David Murraya2860e82011-04-16 09:20:30 -0400225 def test_get_payload_n_raises_on_non_multipart(self):
226 msg = Message()
227 self.assertRaises(TypeError, msg.get_payload, 1)
228
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000229 def test_decoded_generator(self):
230 eq = self.assertEqual
231 msg = self._msgobj('msg_07.txt')
232 with openfile('msg_17.txt') as fp:
233 text = fp.read()
234 s = StringIO()
235 g = DecodedGenerator(s)
236 g.flatten(msg)
237 eq(s.getvalue(), text)
238
239 def test__contains__(self):
240 msg = Message()
241 msg['From'] = 'Me'
242 msg['to'] = 'You'
243 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000244 self.assertTrue('from' in msg)
245 self.assertTrue('From' in msg)
246 self.assertTrue('FROM' in msg)
247 self.assertTrue('to' in msg)
248 self.assertTrue('To' in msg)
249 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000250
251 def test_as_string(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000252 msg = self._msgobj('msg_01.txt')
253 with openfile('msg_01.txt') as fp:
254 text = fp.read()
R David Murraybb17d2b2013-08-09 16:15:28 -0400255 self.assertEqual(text, str(msg))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000256 fullrepr = msg.as_string(unixfrom=True)
257 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000258 self.assertTrue(lines[0].startswith('From '))
R David Murraybb17d2b2013-08-09 16:15:28 -0400259 self.assertEqual(text, NL.join(lines[1:]))
260
261 def test_as_string_policy(self):
262 msg = self._msgobj('msg_01.txt')
263 newpolicy = msg.policy.clone(linesep='\r\n')
264 fullrepr = msg.as_string(policy=newpolicy)
265 s = StringIO()
266 g = Generator(s, policy=newpolicy)
267 g.flatten(msg)
268 self.assertEqual(fullrepr, s.getvalue())
269
270 def test_as_bytes(self):
271 msg = self._msgobj('msg_01.txt')
272 with openfile('msg_01.txt', 'rb') as fp:
273 data = fp.read()
274 self.assertEqual(data, bytes(msg))
275 fullrepr = msg.as_bytes(unixfrom=True)
276 lines = fullrepr.split(b'\n')
277 self.assertTrue(lines[0].startswith(b'From '))
278 self.assertEqual(data, b'\n'.join(lines[1:]))
279
280 def test_as_bytes_policy(self):
281 msg = self._msgobj('msg_01.txt')
282 newpolicy = msg.policy.clone(linesep='\r\n')
283 fullrepr = msg.as_bytes(policy=newpolicy)
284 s = BytesIO()
285 g = BytesGenerator(s,policy=newpolicy)
286 g.flatten(msg)
287 self.assertEqual(fullrepr, s.getvalue())
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000288
R David Murray97f43c02012-06-24 05:03:27 -0400289 # test_headerregistry.TestContentTypeHeader.bad_params
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000290 def test_bad_param(self):
291 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
292 self.assertEqual(msg.get_param('baz'), '')
293
294 def test_missing_filename(self):
295 msg = email.message_from_string("From: foo\n")
296 self.assertEqual(msg.get_filename(), None)
297
298 def test_bogus_filename(self):
299 msg = email.message_from_string(
300 "Content-Disposition: blarg; filename\n")
301 self.assertEqual(msg.get_filename(), '')
302
303 def test_missing_boundary(self):
304 msg = email.message_from_string("From: foo\n")
305 self.assertEqual(msg.get_boundary(), None)
306
307 def test_get_params(self):
308 eq = self.assertEqual
309 msg = email.message_from_string(
310 'X-Header: foo=one; bar=two; baz=three\n')
311 eq(msg.get_params(header='x-header'),
312 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
313 msg = email.message_from_string(
314 'X-Header: foo; bar=one; baz=two\n')
315 eq(msg.get_params(header='x-header'),
316 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
317 eq(msg.get_params(), None)
318 msg = email.message_from_string(
319 'X-Header: foo; bar="one"; baz=two\n')
320 eq(msg.get_params(header='x-header'),
321 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
322
R David Murray97f43c02012-06-24 05:03:27 -0400323 # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000324 def test_get_param_liberal(self):
325 msg = Message()
326 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
327 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
328
329 def test_get_param(self):
330 eq = self.assertEqual
331 msg = email.message_from_string(
332 "X-Header: foo=one; bar=two; baz=three\n")
333 eq(msg.get_param('bar', header='x-header'), 'two')
334 eq(msg.get_param('quuz', header='x-header'), None)
335 eq(msg.get_param('quuz'), None)
336 msg = email.message_from_string(
337 'X-Header: foo; bar="one"; baz=two\n')
338 eq(msg.get_param('foo', header='x-header'), '')
339 eq(msg.get_param('bar', header='x-header'), 'one')
340 eq(msg.get_param('baz', header='x-header'), 'two')
341 # XXX: We are not RFC-2045 compliant! We cannot parse:
342 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
343 # msg.get_param("weird")
344 # yet.
345
R David Murray97f43c02012-06-24 05:03:27 -0400346 # test_headerregistry.TestContentTypeHeader.spaces_around_semis
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000347 def test_get_param_funky_continuation_lines(self):
348 msg = self._msgobj('msg_22.txt')
349 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
350
R David Murray97f43c02012-06-24 05:03:27 -0400351 # test_headerregistry.TestContentTypeHeader.semis_inside_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000352 def test_get_param_with_semis_in_quotes(self):
353 msg = email.message_from_string(
354 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
355 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
356 self.assertEqual(msg.get_param('name', unquote=False),
357 '"Jim&amp;&amp;Jill"')
358
R David Murray97f43c02012-06-24 05:03:27 -0400359 # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value
R. David Murrayd48739f2010-04-14 18:59:18 +0000360 def test_get_param_with_quotes(self):
361 msg = email.message_from_string(
362 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
363 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
364 msg = email.message_from_string(
365 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
366 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
367
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000368 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000369 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000370 msg = email.message_from_string('Header: exists')
371 unless('header' in msg)
372 unless('Header' in msg)
373 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000374 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000375
376 def test_set_param(self):
377 eq = self.assertEqual
378 msg = Message()
379 msg.set_param('charset', 'iso-2022-jp')
380 eq(msg.get_param('charset'), 'iso-2022-jp')
381 msg.set_param('importance', 'high value')
382 eq(msg.get_param('importance'), 'high value')
383 eq(msg.get_param('importance', unquote=False), '"high value"')
384 eq(msg.get_params(), [('text/plain', ''),
385 ('charset', 'iso-2022-jp'),
386 ('importance', 'high value')])
387 eq(msg.get_params(unquote=False), [('text/plain', ''),
388 ('charset', '"iso-2022-jp"'),
389 ('importance', '"high value"')])
390 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
391 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
392
393 def test_del_param(self):
394 eq = self.assertEqual
395 msg = self._msgobj('msg_05.txt')
396 eq(msg.get_params(),
397 [('multipart/report', ''), ('report-type', 'delivery-status'),
398 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
399 old_val = msg.get_param("report-type")
400 msg.del_param("report-type")
401 eq(msg.get_params(),
402 [('multipart/report', ''),
403 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
404 msg.set_param("report-type", old_val)
405 eq(msg.get_params(),
406 [('multipart/report', ''),
407 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
408 ('report-type', old_val)])
409
410 def test_del_param_on_other_header(self):
411 msg = Message()
412 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
413 msg.del_param('filename', 'content-disposition')
414 self.assertEqual(msg['content-disposition'], 'attachment')
415
R David Murraya2860e82011-04-16 09:20:30 -0400416 def test_del_param_on_nonexistent_header(self):
417 msg = Message()
R David Murray271ade82013-07-25 12:11:55 -0400418 # Deleting param on empty msg should not raise exception.
R David Murraya2860e82011-04-16 09:20:30 -0400419 msg.del_param('filename', 'content-disposition')
420
421 def test_del_nonexistent_param(self):
422 msg = Message()
423 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
424 existing_header = msg['Content-Type']
425 msg.del_param('foobar', header='Content-Type')
R David Murray271ade82013-07-25 12:11:55 -0400426 self.assertEqual(msg['Content-Type'], existing_header)
R David Murraya2860e82011-04-16 09:20:30 -0400427
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000428 def test_set_type(self):
429 eq = self.assertEqual
430 msg = Message()
431 self.assertRaises(ValueError, msg.set_type, 'text')
432 msg.set_type('text/plain')
433 eq(msg['content-type'], 'text/plain')
434 msg.set_param('charset', 'us-ascii')
435 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
436 msg.set_type('text/html')
437 eq(msg['content-type'], 'text/html; charset="us-ascii"')
438
439 def test_set_type_on_other_header(self):
440 msg = Message()
441 msg['X-Content-Type'] = 'text/plain'
442 msg.set_type('application/octet-stream', 'X-Content-Type')
443 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
444
445 def test_get_content_type_missing(self):
446 msg = Message()
447 self.assertEqual(msg.get_content_type(), 'text/plain')
448
449 def test_get_content_type_missing_with_default_type(self):
450 msg = Message()
451 msg.set_default_type('message/rfc822')
452 self.assertEqual(msg.get_content_type(), 'message/rfc822')
453
454 def test_get_content_type_from_message_implicit(self):
455 msg = self._msgobj('msg_30.txt')
456 self.assertEqual(msg.get_payload(0).get_content_type(),
457 'message/rfc822')
458
459 def test_get_content_type_from_message_explicit(self):
460 msg = self._msgobj('msg_28.txt')
461 self.assertEqual(msg.get_payload(0).get_content_type(),
462 'message/rfc822')
463
464 def test_get_content_type_from_message_text_plain_implicit(self):
465 msg = self._msgobj('msg_03.txt')
466 self.assertEqual(msg.get_content_type(), 'text/plain')
467
468 def test_get_content_type_from_message_text_plain_explicit(self):
469 msg = self._msgobj('msg_01.txt')
470 self.assertEqual(msg.get_content_type(), 'text/plain')
471
472 def test_get_content_maintype_missing(self):
473 msg = Message()
474 self.assertEqual(msg.get_content_maintype(), 'text')
475
476 def test_get_content_maintype_missing_with_default_type(self):
477 msg = Message()
478 msg.set_default_type('message/rfc822')
479 self.assertEqual(msg.get_content_maintype(), 'message')
480
481 def test_get_content_maintype_from_message_implicit(self):
482 msg = self._msgobj('msg_30.txt')
483 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
484
485 def test_get_content_maintype_from_message_explicit(self):
486 msg = self._msgobj('msg_28.txt')
487 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
488
489 def test_get_content_maintype_from_message_text_plain_implicit(self):
490 msg = self._msgobj('msg_03.txt')
491 self.assertEqual(msg.get_content_maintype(), 'text')
492
493 def test_get_content_maintype_from_message_text_plain_explicit(self):
494 msg = self._msgobj('msg_01.txt')
495 self.assertEqual(msg.get_content_maintype(), 'text')
496
497 def test_get_content_subtype_missing(self):
498 msg = Message()
499 self.assertEqual(msg.get_content_subtype(), 'plain')
500
501 def test_get_content_subtype_missing_with_default_type(self):
502 msg = Message()
503 msg.set_default_type('message/rfc822')
504 self.assertEqual(msg.get_content_subtype(), 'rfc822')
505
506 def test_get_content_subtype_from_message_implicit(self):
507 msg = self._msgobj('msg_30.txt')
508 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
509
510 def test_get_content_subtype_from_message_explicit(self):
511 msg = self._msgobj('msg_28.txt')
512 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
513
514 def test_get_content_subtype_from_message_text_plain_implicit(self):
515 msg = self._msgobj('msg_03.txt')
516 self.assertEqual(msg.get_content_subtype(), 'plain')
517
518 def test_get_content_subtype_from_message_text_plain_explicit(self):
519 msg = self._msgobj('msg_01.txt')
520 self.assertEqual(msg.get_content_subtype(), 'plain')
521
522 def test_get_content_maintype_error(self):
523 msg = Message()
524 msg['Content-Type'] = 'no-slash-in-this-string'
525 self.assertEqual(msg.get_content_maintype(), 'text')
526
527 def test_get_content_subtype_error(self):
528 msg = Message()
529 msg['Content-Type'] = 'no-slash-in-this-string'
530 self.assertEqual(msg.get_content_subtype(), 'plain')
531
532 def test_replace_header(self):
533 eq = self.assertEqual
534 msg = Message()
535 msg.add_header('First', 'One')
536 msg.add_header('Second', 'Two')
537 msg.add_header('Third', 'Three')
538 eq(msg.keys(), ['First', 'Second', 'Third'])
539 eq(msg.values(), ['One', 'Two', 'Three'])
540 msg.replace_header('Second', 'Twenty')
541 eq(msg.keys(), ['First', 'Second', 'Third'])
542 eq(msg.values(), ['One', 'Twenty', 'Three'])
543 msg.add_header('First', 'Eleven')
544 msg.replace_header('First', 'One Hundred')
545 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
546 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
547 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
548
R David Murray80e0aee2012-05-27 21:23:34 -0400549 # test_defect_handling:test_invalid_chars_in_base64_payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000550 def test_broken_base64_payload(self):
551 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
552 msg = Message()
553 msg['content-type'] = 'audio/x-midi'
554 msg['content-transfer-encoding'] = 'base64'
555 msg.set_payload(x)
556 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -0400557 (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0'
558 b'\xa1\x00p\xf6\xbf\xe9\x0f'))
559 self.assertIsInstance(msg.defects[0],
560 errors.InvalidBase64CharactersDefect)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000561
R David Murraya2860e82011-04-16 09:20:30 -0400562 def test_broken_unicode_payload(self):
563 # This test improves coverage but is not a compliance test.
564 # The behavior in this situation is currently undefined by the API.
565 x = 'this is a br\xf6ken thing to do'
566 msg = Message()
567 msg['content-type'] = 'text/plain'
568 msg['content-transfer-encoding'] = '8bit'
569 msg.set_payload(x)
570 self.assertEqual(msg.get_payload(decode=True),
571 bytes(x, 'raw-unicode-escape'))
572
573 def test_questionable_bytes_payload(self):
574 # This test improves coverage but is not a compliance test,
575 # since it involves poking inside the black box.
576 x = 'this is a quéstionable thing to do'.encode('utf-8')
577 msg = Message()
578 msg['content-type'] = 'text/plain; charset="utf-8"'
579 msg['content-transfer-encoding'] = '8bit'
580 msg._payload = x
581 self.assertEqual(msg.get_payload(decode=True), x)
582
R. David Murray7ec754b2010-12-13 23:51:19 +0000583 # Issue 1078919
584 def test_ascii_add_header(self):
585 msg = Message()
586 msg.add_header('Content-Disposition', 'attachment',
587 filename='bud.gif')
588 self.assertEqual('attachment; filename="bud.gif"',
589 msg['Content-Disposition'])
590
591 def test_noascii_add_header(self):
592 msg = Message()
593 msg.add_header('Content-Disposition', 'attachment',
594 filename="Fußballer.ppt")
595 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000596 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000597 msg['Content-Disposition'])
598
599 def test_nonascii_add_header_via_triple(self):
600 msg = Message()
601 msg.add_header('Content-Disposition', 'attachment',
602 filename=('iso-8859-1', '', 'Fußballer.ppt'))
603 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000604 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
605 msg['Content-Disposition'])
606
607 def test_ascii_add_header_with_tspecial(self):
608 msg = Message()
609 msg.add_header('Content-Disposition', 'attachment',
610 filename="windows [filename].ppt")
611 self.assertEqual(
612 'attachment; filename="windows [filename].ppt"',
613 msg['Content-Disposition'])
614
615 def test_nonascii_add_header_with_tspecial(self):
616 msg = Message()
617 msg.add_header('Content-Disposition', 'attachment',
618 filename="Fußballer [filename].ppt")
619 self.assertEqual(
620 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000621 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000622
R David Murraya2860e82011-04-16 09:20:30 -0400623 def test_add_header_with_name_only_param(self):
624 msg = Message()
625 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
626 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
627
628 def test_add_header_with_no_value(self):
629 msg = Message()
630 msg.add_header('X-Status', None)
631 self.assertEqual('', msg['X-Status'])
632
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000633 # Issue 5871: reject an attempt to embed a header inside a header value
634 # (header injection attack).
635 def test_embeded_header_via_Header_rejected(self):
636 msg = Message()
637 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
638 self.assertRaises(errors.HeaderParseError, msg.as_string)
639
640 def test_embeded_header_via_string_rejected(self):
641 msg = Message()
642 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
643 self.assertRaises(errors.HeaderParseError, msg.as_string)
644
R David Murray7441a7a2012-03-14 02:59:51 -0400645 def test_unicode_header_defaults_to_utf8_encoding(self):
646 # Issue 14291
647 m = MIMEText('abc\n')
648 m['Subject'] = 'É test'
649 self.assertEqual(str(m),textwrap.dedent("""\
650 Content-Type: text/plain; charset="us-ascii"
651 MIME-Version: 1.0
652 Content-Transfer-Encoding: 7bit
653 Subject: =?utf-8?q?=C3=89_test?=
654
655 abc
656 """))
657
R David Murray8680bcc2012-03-22 22:17:51 -0400658 def test_unicode_body_defaults_to_utf8_encoding(self):
659 # Issue 14291
660 m = MIMEText('É testabc\n')
661 self.assertEqual(str(m),textwrap.dedent("""\
R David Murray8680bcc2012-03-22 22:17:51 -0400662 Content-Type: text/plain; charset="utf-8"
R David Murray42243c42012-03-22 22:40:44 -0400663 MIME-Version: 1.0
R David Murray8680bcc2012-03-22 22:17:51 -0400664 Content-Transfer-Encoding: base64
665
666 w4kgdGVzdGFiYwo=
667 """))
668
669
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000670# Test the email.encoders module
671class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400672
673 def test_EncodersEncode_base64(self):
674 with openfile('PyBanner048.gif', 'rb') as fp:
675 bindata = fp.read()
676 mimed = email.mime.image.MIMEImage(bindata)
677 base64ed = mimed.get_payload()
678 # the transfer-encoded body lines should all be <=76 characters
679 lines = base64ed.split('\n')
680 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
681
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000682 def test_encode_empty_payload(self):
683 eq = self.assertEqual
684 msg = Message()
685 msg.set_charset('us-ascii')
686 eq(msg['content-transfer-encoding'], '7bit')
687
688 def test_default_cte(self):
689 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000690 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000691 msg = MIMEText('hello world')
692 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000693 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000694 msg = MIMEText('hello \xf8 world')
R David Murray8680bcc2012-03-22 22:17:51 -0400695 eq(msg['content-transfer-encoding'], 'base64')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000696 # And now with a different charset
697 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
698 eq(msg['content-transfer-encoding'], 'quoted-printable')
699
R. David Murraye85200d2010-05-06 01:41:14 +0000700 def test_encode7or8bit(self):
701 # Make sure a charset whose input character set is 8bit but
702 # whose output character set is 7bit gets a transfer-encoding
703 # of 7bit.
704 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000705 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000706 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000707
R David Murrayf581b372013-02-05 10:49:49 -0500708 def test_qp_encode_latin1(self):
709 msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1')
710 self.assertEqual(str(msg), textwrap.dedent("""\
711 MIME-Version: 1.0
712 Content-Type: text/text; charset="iso-8859-1"
713 Content-Transfer-Encoding: quoted-printable
714
715 =E1=F6
716 """))
717
718 def test_qp_encode_non_latin1(self):
719 # Issue 16948
720 msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2')
721 self.assertEqual(str(msg), textwrap.dedent("""\
722 MIME-Version: 1.0
723 Content-Type: text/text; charset="iso-8859-2"
724 Content-Transfer-Encoding: quoted-printable
725
726 =BF
727 """))
728
Ezio Melottib3aedd42010-11-20 19:04:17 +0000729
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000730# Test long header wrapping
731class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400732
733 maxDiff = None
734
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000735 def test_split_long_continuation(self):
736 eq = self.ndiffAssertEqual
737 msg = email.message_from_string("""\
738Subject: bug demonstration
739\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
740\tmore text
741
742test
743""")
744 sfp = StringIO()
745 g = Generator(sfp)
746 g.flatten(msg)
747 eq(sfp.getvalue(), """\
748Subject: bug demonstration
749\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
750\tmore text
751
752test
753""")
754
755 def test_another_long_almost_unsplittable_header(self):
756 eq = self.ndiffAssertEqual
757 hstr = """\
758bug demonstration
759\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
760\tmore text"""
761 h = Header(hstr, continuation_ws='\t')
762 eq(h.encode(), """\
763bug demonstration
764\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
765\tmore text""")
766 h = Header(hstr.replace('\t', ' '))
767 eq(h.encode(), """\
768bug demonstration
769 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
770 more text""")
771
772 def test_long_nonstring(self):
773 eq = self.ndiffAssertEqual
774 g = Charset("iso-8859-1")
775 cz = Charset("iso-8859-2")
776 utf8 = Charset("utf-8")
777 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
778 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
779 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
780 b'bef\xf6rdert. ')
781 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
782 b'd\xf9vtipu.. ')
783 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
784 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
785 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
786 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
787 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
788 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
789 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
790 '\u3044\u307e\u3059\u3002')
791 h = Header(g_head, g, header_name='Subject')
792 h.append(cz_head, cz)
793 h.append(utf8_head, utf8)
794 msg = Message()
795 msg['Subject'] = h
796 sfp = StringIO()
797 g = Generator(sfp)
798 g.flatten(msg)
799 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000800Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
801 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
802 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
803 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
804 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
805 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
806 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
807 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
808 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
809 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
810 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000811
812""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000813 eq(h.encode(maxlinelen=76), """\
814=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
815 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
816 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
817 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
818 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
819 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
820 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
821 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
822 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
823 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
824 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000825
826 def test_long_header_encode(self):
827 eq = self.ndiffAssertEqual
828 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
829 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
830 header_name='X-Foobar-Spoink-Defrobnit')
831 eq(h.encode(), '''\
832wasnipoop; giraffes="very-long-necked-animals";
833 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
834
835 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
836 eq = self.ndiffAssertEqual
837 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
838 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
839 header_name='X-Foobar-Spoink-Defrobnit',
840 continuation_ws='\t')
841 eq(h.encode(), '''\
842wasnipoop; giraffes="very-long-necked-animals";
843 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
844
845 def test_long_header_encode_with_tab_continuation(self):
846 eq = self.ndiffAssertEqual
847 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
848 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
849 header_name='X-Foobar-Spoink-Defrobnit',
850 continuation_ws='\t')
851 eq(h.encode(), '''\
852wasnipoop; giraffes="very-long-necked-animals";
853\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
854
R David Murray3a6152f2011-03-14 21:13:03 -0400855 def test_header_encode_with_different_output_charset(self):
856 h = Header('文', 'euc-jp')
857 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
858
859 def test_long_header_encode_with_different_output_charset(self):
860 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
861 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
862 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
863 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
864 res = """\
865=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
866 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
867 self.assertEqual(h.encode(), res)
868
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000869 def test_header_splitter(self):
870 eq = self.ndiffAssertEqual
871 msg = MIMEText('')
872 # It'd be great if we could use add_header() here, but that doesn't
873 # guarantee an order of the parameters.
874 msg['X-Foobar-Spoink-Defrobnit'] = (
875 'wasnipoop; giraffes="very-long-necked-animals"; '
876 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
877 sfp = StringIO()
878 g = Generator(sfp)
879 g.flatten(msg)
880 eq(sfp.getvalue(), '''\
881Content-Type: text/plain; charset="us-ascii"
882MIME-Version: 1.0
883Content-Transfer-Encoding: 7bit
884X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
885 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
886
887''')
888
889 def test_no_semis_header_splitter(self):
890 eq = self.ndiffAssertEqual
891 msg = Message()
892 msg['From'] = 'test@dom.ain'
893 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
894 msg.set_payload('Test')
895 sfp = StringIO()
896 g = Generator(sfp)
897 g.flatten(msg)
898 eq(sfp.getvalue(), """\
899From: test@dom.ain
900References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
901 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
902
903Test""")
904
R David Murray7da4db12011-04-07 20:37:17 -0400905 def test_last_split_chunk_does_not_fit(self):
906 eq = self.ndiffAssertEqual
907 h = Header('Subject: the first part of this is short, but_the_second'
908 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
909 '_all_by_itself')
910 eq(h.encode(), """\
911Subject: the first part of this is short,
912 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
913
914 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
915 eq = self.ndiffAssertEqual
916 h = Header(', but_the_second'
917 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
918 '_all_by_itself')
919 eq(h.encode(), """\
920,
921 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
922
923 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
924 eq = self.ndiffAssertEqual
925 h = Header(', , but_the_second'
926 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
927 '_all_by_itself')
928 eq(h.encode(), """\
929, ,
930 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
931
932 def test_trailing_splitable_on_overlong_unsplitable(self):
933 eq = self.ndiffAssertEqual
934 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
935 'be_on_a_line_all_by_itself;')
936 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
937 "be_on_a_line_all_by_itself;")
938
939 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
940 eq = self.ndiffAssertEqual
941 h = Header('; '
942 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400943 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400944 eq(h.encode(), """\
945;
R David Murray01581ee2011-04-18 10:04:34 -0400946 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400947
R David Murraye1292a22011-04-07 20:54:03 -0400948 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400949 eq = self.ndiffAssertEqual
950 h = Header('This is a long line that has two whitespaces in a row. '
951 'This used to cause truncation of the header when folded')
952 eq(h.encode(), """\
953This is a long line that has two whitespaces in a row. This used to cause
954 truncation of the header when folded""")
955
R David Murray01581ee2011-04-18 10:04:34 -0400956 def test_splitter_split_on_punctuation_only_if_fws(self):
957 eq = self.ndiffAssertEqual
958 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
959 'they;arenotlegal;fold,points')
960 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
961 "arenotlegal;fold,points")
962
963 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
964 eq = self.ndiffAssertEqual
965 h = Header('this is a test where we need to have more than one line '
966 'before; our final line that is just too big to fit;; '
967 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
968 'be_on_a_line_all_by_itself;')
969 eq(h.encode(), """\
970this is a test where we need to have more than one line before;
971 our final line that is just too big to fit;;
972 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
973
974 def test_overlong_last_part_followed_by_split_point(self):
975 eq = self.ndiffAssertEqual
976 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
977 'be_on_a_line_all_by_itself ')
978 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
979 "should_be_on_a_line_all_by_itself ")
980
981 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
982 eq = self.ndiffAssertEqual
983 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
984 'before_our_final_line_; ; '
985 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
986 'be_on_a_line_all_by_itself; ')
987 eq(h.encode(), """\
988this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
989 ;
990 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
991
992 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
993 eq = self.ndiffAssertEqual
994 h = Header('this is a test where we need to have more than one line '
995 'before our final line; ; '
996 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
997 'be_on_a_line_all_by_itself; ')
998 eq(h.encode(), """\
999this is a test where we need to have more than one line before our final line;
1000 ;
1001 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
1002
1003 def test_long_header_with_whitespace_runs(self):
1004 eq = self.ndiffAssertEqual
1005 msg = Message()
1006 msg['From'] = 'test@dom.ain'
1007 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
1008 msg.set_payload('Test')
1009 sfp = StringIO()
1010 g = Generator(sfp)
1011 g.flatten(msg)
1012 eq(sfp.getvalue(), """\
1013From: test@dom.ain
1014References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1015 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1016 <foo@dom.ain> <foo@dom.ain>\x20\x20
1017
1018Test""")
1019
1020 def test_long_run_with_semi_header_splitter(self):
1021 eq = self.ndiffAssertEqual
1022 msg = Message()
1023 msg['From'] = 'test@dom.ain'
1024 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
1025 msg.set_payload('Test')
1026 sfp = StringIO()
1027 g = Generator(sfp)
1028 g.flatten(msg)
1029 eq(sfp.getvalue(), """\
1030From: test@dom.ain
1031References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1032 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1033 <foo@dom.ain>; abc
1034
1035Test""")
1036
1037 def test_splitter_split_on_punctuation_only_if_fws(self):
1038 eq = self.ndiffAssertEqual
1039 msg = Message()
1040 msg['From'] = 'test@dom.ain'
1041 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
1042 'they;arenotlegal;fold,points')
1043 msg.set_payload('Test')
1044 sfp = StringIO()
1045 g = Generator(sfp)
1046 g.flatten(msg)
1047 # XXX the space after the header should not be there.
1048 eq(sfp.getvalue(), """\
1049From: test@dom.ain
1050References:\x20
1051 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
1052
1053Test""")
1054
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001055 def test_no_split_long_header(self):
1056 eq = self.ndiffAssertEqual
1057 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +00001058 h = Header(hstr)
1059 # These come on two lines because Headers are really field value
1060 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001061 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001062References:
1063 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1064 h = Header('x' * 80)
1065 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001066
1067 def test_splitting_multiple_long_lines(self):
1068 eq = self.ndiffAssertEqual
1069 hstr = """\
1070from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1071\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1072\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1073"""
1074 h = Header(hstr, continuation_ws='\t')
1075 eq(h.encode(), """\
1076from babylon.socal-raves.org (localhost [127.0.0.1]);
1077 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1078 for <mailman-admin@babylon.socal-raves.org>;
1079 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1080\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1081 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1082 for <mailman-admin@babylon.socal-raves.org>;
1083 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1084\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1085 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1086 for <mailman-admin@babylon.socal-raves.org>;
1087 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1088
1089 def test_splitting_first_line_only_is_long(self):
1090 eq = self.ndiffAssertEqual
1091 hstr = """\
1092from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1093\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1094\tid 17k4h5-00034i-00
1095\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1096 h = Header(hstr, maxlinelen=78, header_name='Received',
1097 continuation_ws='\t')
1098 eq(h.encode(), """\
1099from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1100 helo=cthulhu.gerg.ca)
1101\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1102\tid 17k4h5-00034i-00
1103\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1104
1105 def test_long_8bit_header(self):
1106 eq = self.ndiffAssertEqual
1107 msg = Message()
1108 h = Header('Britische Regierung gibt', 'iso-8859-1',
1109 header_name='Subject')
1110 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001111 eq(h.encode(maxlinelen=76), """\
1112=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1113 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001114 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001115 eq(msg.as_string(maxheaderlen=76), """\
1116Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1117 =?iso-8859-1?q?hore-Windkraftprojekte?=
1118
1119""")
1120 eq(msg.as_string(maxheaderlen=0), """\
1121Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001122
1123""")
1124
1125 def test_long_8bit_header_no_charset(self):
1126 eq = self.ndiffAssertEqual
1127 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001128 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1129 'f\xfcr Offshore-Windkraftprojekte '
1130 '<a-very-long-address@example.com>')
1131 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001132 eq(msg.as_string(maxheaderlen=78), """\
1133Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1134 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1135
1136""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001137 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001138 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001139 header_name='Reply-To')
1140 eq(msg.as_string(maxheaderlen=78), """\
1141Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1142 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001143
1144""")
1145
1146 def test_long_to_header(self):
1147 eq = self.ndiffAssertEqual
1148 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001149 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001150 '"Someone Test #B" <someone@umich.edu>, '
1151 '"Someone Test #C" <someone@eecs.umich.edu>, '
1152 '"Someone Test #D" <someone@eecs.umich.edu>')
1153 msg = Message()
1154 msg['To'] = to
1155 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001156To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001157 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001158 "Someone Test #C" <someone@eecs.umich.edu>,
1159 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001160
1161''')
1162
1163 def test_long_line_after_append(self):
1164 eq = self.ndiffAssertEqual
1165 s = 'This is an example of string which has almost the limit of header length.'
1166 h = Header(s)
1167 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001168 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001169This is an example of string which has almost the limit of header length.
1170 Add another line.""")
1171
1172 def test_shorter_line_with_append(self):
1173 eq = self.ndiffAssertEqual
1174 s = 'This is a shorter line.'
1175 h = Header(s)
1176 h.append('Add another sentence. (Surprise?)')
1177 eq(h.encode(),
1178 'This is a shorter line. Add another sentence. (Surprise?)')
1179
1180 def test_long_field_name(self):
1181 eq = self.ndiffAssertEqual
1182 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001183 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1184 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1185 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1186 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001187 h = Header(gs, 'iso-8859-1', header_name=fn)
1188 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001189 eq(h.encode(maxlinelen=76), """\
1190=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1191 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1192 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1193 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001194
1195 def test_long_received_header(self):
1196 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1197 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1198 'Wed, 05 Mar 2003 18:10:18 -0700')
1199 msg = Message()
1200 msg['Received-1'] = Header(h, continuation_ws='\t')
1201 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001202 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001203 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001204Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1205 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001206 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001207Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1208 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001209 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001210
1211""")
1212
1213 def test_string_headerinst_eq(self):
1214 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1215 'tu-muenchen.de> (David Bremner\'s message of '
1216 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1217 msg = Message()
1218 msg['Received-1'] = Header(h, header_name='Received-1',
1219 continuation_ws='\t')
1220 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001221 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001222 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001223Received-1:\x20
1224 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1225 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1226Received-2:\x20
1227 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1228 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001229
1230""")
1231
1232 def test_long_unbreakable_lines_with_continuation(self):
1233 eq = self.ndiffAssertEqual
1234 msg = Message()
1235 t = """\
1236iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1237 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1238 msg['Face-1'] = t
1239 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001240 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001241 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001242 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001243 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001244Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001245 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001246 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001247Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001248 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001249 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001250Face-3:\x20
1251 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1252 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001253
1254""")
1255
1256 def test_another_long_multiline_header(self):
1257 eq = self.ndiffAssertEqual
1258 m = ('Received: from siimage.com '
1259 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001260 'Microsoft SMTPSVC(5.0.2195.4905); '
1261 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001262 msg = email.message_from_string(m)
1263 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001264Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1265 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001266
1267''')
1268
1269 def test_long_lines_with_different_header(self):
1270 eq = self.ndiffAssertEqual
1271 h = ('List-Unsubscribe: '
1272 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1273 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1274 '?subject=unsubscribe>')
1275 msg = Message()
1276 msg['List'] = h
1277 msg['List'] = Header(h, header_name='List')
1278 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001279List: List-Unsubscribe:
1280 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001281 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001282List: List-Unsubscribe:
1283 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001284 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001285
1286""")
1287
R. David Murray6f0022d2011-01-07 21:57:25 +00001288 def test_long_rfc2047_header_with_embedded_fws(self):
1289 h = Header(textwrap.dedent("""\
1290 We're going to pretend this header is in a non-ascii character set
1291 \tto see if line wrapping with encoded words and embedded
1292 folding white space works"""),
1293 charset='utf-8',
1294 header_name='Test')
1295 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1296 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1297 =?utf-8?q?cter_set?=
1298 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1299 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1300
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001301
Ezio Melottib3aedd42010-11-20 19:04:17 +00001302
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001303# Test mangling of "From " lines in the body of a message
1304class TestFromMangling(unittest.TestCase):
1305 def setUp(self):
1306 self.msg = Message()
1307 self.msg['From'] = 'aaa@bbb.org'
1308 self.msg.set_payload("""\
1309From the desk of A.A.A.:
1310Blah blah blah
1311""")
1312
1313 def test_mangled_from(self):
1314 s = StringIO()
1315 g = Generator(s, mangle_from_=True)
1316 g.flatten(self.msg)
1317 self.assertEqual(s.getvalue(), """\
1318From: aaa@bbb.org
1319
1320>From the desk of A.A.A.:
1321Blah blah blah
1322""")
1323
1324 def test_dont_mangle_from(self):
1325 s = StringIO()
1326 g = Generator(s, mangle_from_=False)
1327 g.flatten(self.msg)
1328 self.assertEqual(s.getvalue(), """\
1329From: aaa@bbb.org
1330
1331From the desk of A.A.A.:
1332Blah blah blah
1333""")
1334
R David Murray6a31bc62012-07-22 21:47:53 -04001335 def test_mangle_from_in_preamble_and_epilog(self):
1336 s = StringIO()
1337 g = Generator(s, mangle_from_=True)
1338 msg = email.message_from_string(textwrap.dedent("""\
1339 From: foo@bar.com
1340 Mime-Version: 1.0
1341 Content-Type: multipart/mixed; boundary=XXX
1342
1343 From somewhere unknown
1344
1345 --XXX
1346 Content-Type: text/plain
1347
1348 foo
1349
1350 --XXX--
1351
1352 From somewhere unknowable
1353 """))
1354 g.flatten(msg)
1355 self.assertEqual(len([1 for x in s.getvalue().split('\n')
1356 if x.startswith('>From ')]), 2)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001357
R David Murray638d40b2012-08-24 11:14:13 -04001358 def test_mangled_from_with_bad_bytes(self):
1359 source = textwrap.dedent("""\
1360 Content-Type: text/plain; charset="utf-8"
1361 MIME-Version: 1.0
1362 Content-Transfer-Encoding: 8bit
1363 From: aaa@bbb.org
1364
1365 """).encode('utf-8')
1366 msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')
1367 b = BytesIO()
1368 g = BytesGenerator(b, mangle_from_=True)
1369 g.flatten(msg)
1370 self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')
1371
Ezio Melottib3aedd42010-11-20 19:04:17 +00001372
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001373# Test the basic MIMEAudio class
1374class TestMIMEAudio(unittest.TestCase):
1375 def setUp(self):
R David Murray28346b82011-03-31 11:40:20 -04001376 with openfile('audiotest.au', 'rb') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001377 self._audiodata = fp.read()
1378 self._au = MIMEAudio(self._audiodata)
1379
1380 def test_guess_minor_type(self):
1381 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1382
1383 def test_encoding(self):
1384 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001385 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1386 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001387
1388 def test_checkSetMinor(self):
1389 au = MIMEAudio(self._audiodata, 'fish')
1390 self.assertEqual(au.get_content_type(), 'audio/fish')
1391
1392 def test_add_header(self):
1393 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001394 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001395 self._au.add_header('Content-Disposition', 'attachment',
1396 filename='audiotest.au')
1397 eq(self._au['content-disposition'],
1398 'attachment; filename="audiotest.au"')
1399 eq(self._au.get_params(header='content-disposition'),
1400 [('attachment', ''), ('filename', 'audiotest.au')])
1401 eq(self._au.get_param('filename', header='content-disposition'),
1402 'audiotest.au')
1403 missing = []
1404 eq(self._au.get_param('attachment', header='content-disposition'), '')
1405 unless(self._au.get_param('foo', failobj=missing,
1406 header='content-disposition') is missing)
1407 # Try some missing stuff
1408 unless(self._au.get_param('foobar', missing) is missing)
1409 unless(self._au.get_param('attachment', missing,
1410 header='foobar') is missing)
1411
1412
Ezio Melottib3aedd42010-11-20 19:04:17 +00001413
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001414# Test the basic MIMEImage class
1415class TestMIMEImage(unittest.TestCase):
1416 def setUp(self):
1417 with openfile('PyBanner048.gif', 'rb') as fp:
1418 self._imgdata = fp.read()
1419 self._im = MIMEImage(self._imgdata)
1420
1421 def test_guess_minor_type(self):
1422 self.assertEqual(self._im.get_content_type(), 'image/gif')
1423
1424 def test_encoding(self):
1425 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001426 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1427 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001428
1429 def test_checkSetMinor(self):
1430 im = MIMEImage(self._imgdata, 'fish')
1431 self.assertEqual(im.get_content_type(), 'image/fish')
1432
1433 def test_add_header(self):
1434 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001435 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001436 self._im.add_header('Content-Disposition', 'attachment',
1437 filename='dingusfish.gif')
1438 eq(self._im['content-disposition'],
1439 'attachment; filename="dingusfish.gif"')
1440 eq(self._im.get_params(header='content-disposition'),
1441 [('attachment', ''), ('filename', 'dingusfish.gif')])
1442 eq(self._im.get_param('filename', header='content-disposition'),
1443 'dingusfish.gif')
1444 missing = []
1445 eq(self._im.get_param('attachment', header='content-disposition'), '')
1446 unless(self._im.get_param('foo', failobj=missing,
1447 header='content-disposition') is missing)
1448 # Try some missing stuff
1449 unless(self._im.get_param('foobar', missing) is missing)
1450 unless(self._im.get_param('attachment', missing,
1451 header='foobar') is missing)
1452
1453
Ezio Melottib3aedd42010-11-20 19:04:17 +00001454
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001455# Test the basic MIMEApplication class
1456class TestMIMEApplication(unittest.TestCase):
1457 def test_headers(self):
1458 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001459 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001460 eq(msg.get_content_type(), 'application/octet-stream')
1461 eq(msg['content-transfer-encoding'], 'base64')
1462
1463 def test_body(self):
1464 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001465 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1466 msg = MIMEApplication(bytesdata)
1467 # whitespace in the cte encoded block is RFC-irrelevant.
1468 eq(msg.get_payload().strip(), '+vv8/f7/')
1469 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001470
R David Murrayec317a82013-02-11 10:51:28 -05001471 def test_binary_body_with_encode_7or8bit(self):
1472 # Issue 17171.
1473 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1474 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit)
1475 # Treated as a string, this will be invalid code points.
1476 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1477 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1478 self.assertEqual(msg['Content-Transfer-Encoding'], '8bit')
1479 s = BytesIO()
1480 g = BytesGenerator(s)
1481 g.flatten(msg)
1482 wireform = s.getvalue()
1483 msg2 = email.message_from_bytes(wireform)
1484 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1485 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1486 self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit')
1487
1488 def test_binary_body_with_encode_noop(self):
R David Murrayceaa8b12013-02-09 13:02:58 -05001489 # Issue 16564: This does not produce an RFC valid message, since to be
1490 # valid it should have a CTE of binary. But the below works in
1491 # Python2, and is documented as working this way.
1492 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1493 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1494 # Treated as a string, this will be invalid code points.
1495 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1496 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1497 s = BytesIO()
1498 g = BytesGenerator(s)
1499 g.flatten(msg)
1500 wireform = s.getvalue()
1501 msg2 = email.message_from_bytes(wireform)
1502 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1503 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001504
R David Murrayf6069f92013-06-27 18:37:00 -04001505 def test_binary_body_with_encode_quopri(self):
1506 # Issue 14360.
1507 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff '
1508 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_quopri)
1509 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1510 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1511 self.assertEqual(msg['Content-Transfer-Encoding'], 'quoted-printable')
1512 s = BytesIO()
1513 g = BytesGenerator(s)
1514 g.flatten(msg)
1515 wireform = s.getvalue()
1516 msg2 = email.message_from_bytes(wireform)
1517 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1518 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1519 self.assertEqual(msg2['Content-Transfer-Encoding'], 'quoted-printable')
1520
1521 def test_binary_body_with_encode_base64(self):
1522 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1523 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_base64)
1524 self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1525 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1526 s = BytesIO()
1527 g = BytesGenerator(s)
1528 g.flatten(msg)
1529 wireform = s.getvalue()
1530 msg2 = email.message_from_bytes(wireform)
1531 self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1532 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1533
Ezio Melottib3aedd42010-11-20 19:04:17 +00001534
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001535# Test the basic MIMEText class
1536class TestMIMEText(unittest.TestCase):
1537 def setUp(self):
1538 self._msg = MIMEText('hello there')
1539
1540 def test_types(self):
1541 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001542 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001543 eq(self._msg.get_content_type(), 'text/plain')
1544 eq(self._msg.get_param('charset'), 'us-ascii')
1545 missing = []
1546 unless(self._msg.get_param('foobar', missing) is missing)
1547 unless(self._msg.get_param('charset', missing, header='foobar')
1548 is missing)
1549
1550 def test_payload(self):
1551 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001552 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001553
1554 def test_charset(self):
1555 eq = self.assertEqual
1556 msg = MIMEText('hello there', _charset='us-ascii')
1557 eq(msg.get_charset().input_charset, 'us-ascii')
1558 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1559
R. David Murray850fc852010-06-03 01:58:28 +00001560 def test_7bit_input(self):
1561 eq = self.assertEqual
1562 msg = MIMEText('hello there', _charset='us-ascii')
1563 eq(msg.get_charset().input_charset, 'us-ascii')
1564 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1565
1566 def test_7bit_input_no_charset(self):
1567 eq = self.assertEqual
1568 msg = MIMEText('hello there')
1569 eq(msg.get_charset(), 'us-ascii')
1570 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1571 self.assertTrue('hello there' in msg.as_string())
1572
1573 def test_utf8_input(self):
1574 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1575 eq = self.assertEqual
1576 msg = MIMEText(teststr, _charset='utf-8')
1577 eq(msg.get_charset().output_charset, 'utf-8')
1578 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1579 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1580
1581 @unittest.skip("can't fix because of backward compat in email5, "
1582 "will fix in email6")
1583 def test_utf8_input_no_charset(self):
1584 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1585 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1586
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001587
Ezio Melottib3aedd42010-11-20 19:04:17 +00001588
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001589# Test complicated multipart/* messages
1590class TestMultipart(TestEmailBase):
1591 def setUp(self):
1592 with openfile('PyBanner048.gif', 'rb') as fp:
1593 data = fp.read()
1594 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1595 image = MIMEImage(data, name='dingusfish.gif')
1596 image.add_header('content-disposition', 'attachment',
1597 filename='dingusfish.gif')
1598 intro = MIMEText('''\
1599Hi there,
1600
1601This is the dingus fish.
1602''')
1603 container.attach(intro)
1604 container.attach(image)
1605 container['From'] = 'Barry <barry@digicool.com>'
1606 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1607 container['Subject'] = 'Here is your dingus fish'
1608
1609 now = 987809702.54848599
1610 timetuple = time.localtime(now)
1611 if timetuple[-1] == 0:
1612 tzsecs = time.timezone
1613 else:
1614 tzsecs = time.altzone
1615 if tzsecs > 0:
1616 sign = '-'
1617 else:
1618 sign = '+'
1619 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1620 container['Date'] = time.strftime(
1621 '%a, %d %b %Y %H:%M:%S',
1622 time.localtime(now)) + tzoffset
1623 self._msg = container
1624 self._im = image
1625 self._txt = intro
1626
1627 def test_hierarchy(self):
1628 # convenience
1629 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001630 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001631 raises = self.assertRaises
1632 # tests
1633 m = self._msg
1634 unless(m.is_multipart())
1635 eq(m.get_content_type(), 'multipart/mixed')
1636 eq(len(m.get_payload()), 2)
1637 raises(IndexError, m.get_payload, 2)
1638 m0 = m.get_payload(0)
1639 m1 = m.get_payload(1)
1640 unless(m0 is self._txt)
1641 unless(m1 is self._im)
1642 eq(m.get_payload(), [m0, m1])
1643 unless(not m0.is_multipart())
1644 unless(not m1.is_multipart())
1645
1646 def test_empty_multipart_idempotent(self):
1647 text = """\
1648Content-Type: multipart/mixed; boundary="BOUNDARY"
1649MIME-Version: 1.0
1650Subject: A subject
1651To: aperson@dom.ain
1652From: bperson@dom.ain
1653
1654
1655--BOUNDARY
1656
1657
1658--BOUNDARY--
1659"""
1660 msg = Parser().parsestr(text)
1661 self.ndiffAssertEqual(text, msg.as_string())
1662
1663 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1664 outer = MIMEBase('multipart', 'mixed')
1665 outer['Subject'] = 'A subject'
1666 outer['To'] = 'aperson@dom.ain'
1667 outer['From'] = 'bperson@dom.ain'
1668 outer.set_boundary('BOUNDARY')
1669 self.ndiffAssertEqual(outer.as_string(), '''\
1670Content-Type: multipart/mixed; boundary="BOUNDARY"
1671MIME-Version: 1.0
1672Subject: A subject
1673To: aperson@dom.ain
1674From: bperson@dom.ain
1675
1676--BOUNDARY
1677
1678--BOUNDARY--''')
1679
1680 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1681 outer = MIMEBase('multipart', 'mixed')
1682 outer['Subject'] = 'A subject'
1683 outer['To'] = 'aperson@dom.ain'
1684 outer['From'] = 'bperson@dom.ain'
1685 outer.preamble = ''
1686 outer.epilogue = ''
1687 outer.set_boundary('BOUNDARY')
1688 self.ndiffAssertEqual(outer.as_string(), '''\
1689Content-Type: multipart/mixed; boundary="BOUNDARY"
1690MIME-Version: 1.0
1691Subject: A subject
1692To: aperson@dom.ain
1693From: bperson@dom.ain
1694
1695
1696--BOUNDARY
1697
1698--BOUNDARY--
1699''')
1700
1701 def test_one_part_in_a_multipart(self):
1702 eq = self.ndiffAssertEqual
1703 outer = MIMEBase('multipart', 'mixed')
1704 outer['Subject'] = 'A subject'
1705 outer['To'] = 'aperson@dom.ain'
1706 outer['From'] = 'bperson@dom.ain'
1707 outer.set_boundary('BOUNDARY')
1708 msg = MIMEText('hello world')
1709 outer.attach(msg)
1710 eq(outer.as_string(), '''\
1711Content-Type: multipart/mixed; boundary="BOUNDARY"
1712MIME-Version: 1.0
1713Subject: A subject
1714To: aperson@dom.ain
1715From: bperson@dom.ain
1716
1717--BOUNDARY
1718Content-Type: text/plain; charset="us-ascii"
1719MIME-Version: 1.0
1720Content-Transfer-Encoding: 7bit
1721
1722hello world
1723--BOUNDARY--''')
1724
1725 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1726 eq = self.ndiffAssertEqual
1727 outer = MIMEBase('multipart', 'mixed')
1728 outer['Subject'] = 'A subject'
1729 outer['To'] = 'aperson@dom.ain'
1730 outer['From'] = 'bperson@dom.ain'
1731 outer.preamble = ''
1732 msg = MIMEText('hello world')
1733 outer.attach(msg)
1734 outer.set_boundary('BOUNDARY')
1735 eq(outer.as_string(), '''\
1736Content-Type: multipart/mixed; boundary="BOUNDARY"
1737MIME-Version: 1.0
1738Subject: A subject
1739To: aperson@dom.ain
1740From: bperson@dom.ain
1741
1742
1743--BOUNDARY
1744Content-Type: text/plain; charset="us-ascii"
1745MIME-Version: 1.0
1746Content-Transfer-Encoding: 7bit
1747
1748hello world
1749--BOUNDARY--''')
1750
1751
1752 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1753 eq = self.ndiffAssertEqual
1754 outer = MIMEBase('multipart', 'mixed')
1755 outer['Subject'] = 'A subject'
1756 outer['To'] = 'aperson@dom.ain'
1757 outer['From'] = 'bperson@dom.ain'
1758 outer.preamble = None
1759 msg = MIMEText('hello world')
1760 outer.attach(msg)
1761 outer.set_boundary('BOUNDARY')
1762 eq(outer.as_string(), '''\
1763Content-Type: multipart/mixed; boundary="BOUNDARY"
1764MIME-Version: 1.0
1765Subject: A subject
1766To: aperson@dom.ain
1767From: bperson@dom.ain
1768
1769--BOUNDARY
1770Content-Type: text/plain; charset="us-ascii"
1771MIME-Version: 1.0
1772Content-Transfer-Encoding: 7bit
1773
1774hello world
1775--BOUNDARY--''')
1776
1777
1778 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1779 eq = self.ndiffAssertEqual
1780 outer = MIMEBase('multipart', 'mixed')
1781 outer['Subject'] = 'A subject'
1782 outer['To'] = 'aperson@dom.ain'
1783 outer['From'] = 'bperson@dom.ain'
1784 outer.epilogue = None
1785 msg = MIMEText('hello world')
1786 outer.attach(msg)
1787 outer.set_boundary('BOUNDARY')
1788 eq(outer.as_string(), '''\
1789Content-Type: multipart/mixed; boundary="BOUNDARY"
1790MIME-Version: 1.0
1791Subject: A subject
1792To: aperson@dom.ain
1793From: bperson@dom.ain
1794
1795--BOUNDARY
1796Content-Type: text/plain; charset="us-ascii"
1797MIME-Version: 1.0
1798Content-Transfer-Encoding: 7bit
1799
1800hello world
1801--BOUNDARY--''')
1802
1803
1804 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1805 eq = self.ndiffAssertEqual
1806 outer = MIMEBase('multipart', 'mixed')
1807 outer['Subject'] = 'A subject'
1808 outer['To'] = 'aperson@dom.ain'
1809 outer['From'] = 'bperson@dom.ain'
1810 outer.epilogue = ''
1811 msg = MIMEText('hello world')
1812 outer.attach(msg)
1813 outer.set_boundary('BOUNDARY')
1814 eq(outer.as_string(), '''\
1815Content-Type: multipart/mixed; boundary="BOUNDARY"
1816MIME-Version: 1.0
1817Subject: A subject
1818To: aperson@dom.ain
1819From: bperson@dom.ain
1820
1821--BOUNDARY
1822Content-Type: text/plain; charset="us-ascii"
1823MIME-Version: 1.0
1824Content-Transfer-Encoding: 7bit
1825
1826hello world
1827--BOUNDARY--
1828''')
1829
1830
1831 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1832 eq = self.ndiffAssertEqual
1833 outer = MIMEBase('multipart', 'mixed')
1834 outer['Subject'] = 'A subject'
1835 outer['To'] = 'aperson@dom.ain'
1836 outer['From'] = 'bperson@dom.ain'
1837 outer.epilogue = '\n'
1838 msg = MIMEText('hello world')
1839 outer.attach(msg)
1840 outer.set_boundary('BOUNDARY')
1841 eq(outer.as_string(), '''\
1842Content-Type: multipart/mixed; boundary="BOUNDARY"
1843MIME-Version: 1.0
1844Subject: A subject
1845To: aperson@dom.ain
1846From: bperson@dom.ain
1847
1848--BOUNDARY
1849Content-Type: text/plain; charset="us-ascii"
1850MIME-Version: 1.0
1851Content-Transfer-Encoding: 7bit
1852
1853hello world
1854--BOUNDARY--
1855
1856''')
1857
1858 def test_message_external_body(self):
1859 eq = self.assertEqual
1860 msg = self._msgobj('msg_36.txt')
1861 eq(len(msg.get_payload()), 2)
1862 msg1 = msg.get_payload(1)
1863 eq(msg1.get_content_type(), 'multipart/alternative')
1864 eq(len(msg1.get_payload()), 2)
1865 for subpart in msg1.get_payload():
1866 eq(subpart.get_content_type(), 'message/external-body')
1867 eq(len(subpart.get_payload()), 1)
1868 subsubpart = subpart.get_payload(0)
1869 eq(subsubpart.get_content_type(), 'text/plain')
1870
1871 def test_double_boundary(self):
1872 # msg_37.txt is a multipart that contains two dash-boundary's in a
1873 # row. Our interpretation of RFC 2046 calls for ignoring the second
1874 # and subsequent boundaries.
1875 msg = self._msgobj('msg_37.txt')
1876 self.assertEqual(len(msg.get_payload()), 3)
1877
1878 def test_nested_inner_contains_outer_boundary(self):
1879 eq = self.ndiffAssertEqual
1880 # msg_38.txt has an inner part that contains outer boundaries. My
1881 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1882 # these are illegal and should be interpreted as unterminated inner
1883 # parts.
1884 msg = self._msgobj('msg_38.txt')
1885 sfp = StringIO()
1886 iterators._structure(msg, sfp)
1887 eq(sfp.getvalue(), """\
1888multipart/mixed
1889 multipart/mixed
1890 multipart/alternative
1891 text/plain
1892 text/plain
1893 text/plain
1894 text/plain
1895""")
1896
1897 def test_nested_with_same_boundary(self):
1898 eq = self.ndiffAssertEqual
1899 # msg 39.txt is similarly evil in that it's got inner parts that use
1900 # the same boundary as outer parts. Again, I believe the way this is
1901 # parsed is closest to the spirit of RFC 2046
1902 msg = self._msgobj('msg_39.txt')
1903 sfp = StringIO()
1904 iterators._structure(msg, sfp)
1905 eq(sfp.getvalue(), """\
1906multipart/mixed
1907 multipart/mixed
1908 multipart/alternative
1909 application/octet-stream
1910 application/octet-stream
1911 text/plain
1912""")
1913
1914 def test_boundary_in_non_multipart(self):
1915 msg = self._msgobj('msg_40.txt')
1916 self.assertEqual(msg.as_string(), '''\
1917MIME-Version: 1.0
1918Content-Type: text/html; boundary="--961284236552522269"
1919
1920----961284236552522269
1921Content-Type: text/html;
1922Content-Transfer-Encoding: 7Bit
1923
1924<html></html>
1925
1926----961284236552522269--
1927''')
1928
1929 def test_boundary_with_leading_space(self):
1930 eq = self.assertEqual
1931 msg = email.message_from_string('''\
1932MIME-Version: 1.0
1933Content-Type: multipart/mixed; boundary=" XXXX"
1934
1935-- XXXX
1936Content-Type: text/plain
1937
1938
1939-- XXXX
1940Content-Type: text/plain
1941
1942-- XXXX--
1943''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001944 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001945 eq(msg.get_boundary(), ' XXXX')
1946 eq(len(msg.get_payload()), 2)
1947
1948 def test_boundary_without_trailing_newline(self):
1949 m = Parser().parsestr("""\
1950Content-Type: multipart/mixed; boundary="===============0012394164=="
1951MIME-Version: 1.0
1952
1953--===============0012394164==
1954Content-Type: image/file1.jpg
1955MIME-Version: 1.0
1956Content-Transfer-Encoding: base64
1957
1958YXNkZg==
1959--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001960 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001961
1962
Ezio Melottib3aedd42010-11-20 19:04:17 +00001963
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001964# Test some badly formatted messages
R David Murrayc27e5222012-05-25 15:01:48 -04001965class TestNonConformant(TestEmailBase):
R David Murray3edd22a2011-04-18 13:59:37 -04001966
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001967 def test_parse_missing_minor_type(self):
1968 eq = self.assertEqual
1969 msg = self._msgobj('msg_14.txt')
1970 eq(msg.get_content_type(), 'text/plain')
1971 eq(msg.get_content_maintype(), 'text')
1972 eq(msg.get_content_subtype(), 'plain')
1973
R David Murray80e0aee2012-05-27 21:23:34 -04001974 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001975 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001976 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001977 msg = self._msgobj('msg_15.txt')
1978 # XXX We can probably eventually do better
1979 inner = msg.get_payload(0)
1980 unless(hasattr(inner, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04001981 self.assertEqual(len(inner.defects), 1)
1982 unless(isinstance(inner.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001983 errors.StartBoundaryNotFoundDefect))
1984
R David Murray80e0aee2012-05-27 21:23:34 -04001985 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001986 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001987 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001988 msg = self._msgobj('msg_25.txt')
1989 unless(isinstance(msg.get_payload(), str))
R David Murrayc27e5222012-05-25 15:01:48 -04001990 self.assertEqual(len(msg.defects), 2)
1991 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04001992 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04001993 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001994 errors.MultipartInvariantViolationDefect))
1995
R David Murray749073a2011-06-22 13:47:53 -04001996 multipart_msg = textwrap.dedent("""\
1997 Date: Wed, 14 Nov 2007 12:56:23 GMT
1998 From: foo@bar.invalid
1999 To: foo@bar.invalid
2000 Subject: Content-Transfer-Encoding: base64 and multipart
2001 MIME-Version: 1.0
2002 Content-Type: multipart/mixed;
2003 boundary="===============3344438784458119861=="{}
2004
2005 --===============3344438784458119861==
2006 Content-Type: text/plain
2007
2008 Test message
2009
2010 --===============3344438784458119861==
2011 Content-Type: application/octet-stream
2012 Content-Transfer-Encoding: base64
2013
2014 YWJj
2015
2016 --===============3344438784458119861==--
2017 """)
2018
R David Murray80e0aee2012-05-27 21:23:34 -04002019 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04002020 def test_multipart_invalid_cte(self):
R David Murrayc27e5222012-05-25 15:01:48 -04002021 msg = self._str_msg(
2022 self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
2023 self.assertEqual(len(msg.defects), 1)
2024 self.assertIsInstance(msg.defects[0],
R David Murray749073a2011-06-22 13:47:53 -04002025 errors.InvalidMultipartContentTransferEncodingDefect)
2026
R David Murray80e0aee2012-05-27 21:23:34 -04002027 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04002028 def test_multipart_no_cte_no_defect(self):
R David Murrayc27e5222012-05-25 15:01:48 -04002029 msg = self._str_msg(self.multipart_msg.format(''))
2030 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04002031
R David Murray80e0aee2012-05-27 21:23:34 -04002032 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04002033 def test_multipart_valid_cte_no_defect(self):
2034 for cte in ('7bit', '8bit', 'BINary'):
R David Murrayc27e5222012-05-25 15:01:48 -04002035 msg = self._str_msg(
R David Murray749073a2011-06-22 13:47:53 -04002036 self.multipart_msg.format(
R David Murrayc27e5222012-05-25 15:01:48 -04002037 "\nContent-Transfer-Encoding: {}".format(cte)))
2038 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04002039
R David Murray97f43c02012-06-24 05:03:27 -04002040 # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002041 def test_invalid_content_type(self):
2042 eq = self.assertEqual
2043 neq = self.ndiffAssertEqual
2044 msg = Message()
2045 # RFC 2045, $5.2 says invalid yields text/plain
2046 msg['Content-Type'] = 'text'
2047 eq(msg.get_content_maintype(), 'text')
2048 eq(msg.get_content_subtype(), 'plain')
2049 eq(msg.get_content_type(), 'text/plain')
2050 # Clear the old value and try something /really/ invalid
2051 del msg['content-type']
2052 msg['Content-Type'] = 'foo'
2053 eq(msg.get_content_maintype(), 'text')
2054 eq(msg.get_content_subtype(), 'plain')
2055 eq(msg.get_content_type(), 'text/plain')
2056 # Still, make sure that the message is idempotently generated
2057 s = StringIO()
2058 g = Generator(s)
2059 g.flatten(msg)
2060 neq(s.getvalue(), 'Content-Type: foo\n\n')
2061
2062 def test_no_start_boundary(self):
2063 eq = self.ndiffAssertEqual
2064 msg = self._msgobj('msg_31.txt')
2065 eq(msg.get_payload(), """\
2066--BOUNDARY
2067Content-Type: text/plain
2068
2069message 1
2070
2071--BOUNDARY
2072Content-Type: text/plain
2073
2074message 2
2075
2076--BOUNDARY--
2077""")
2078
2079 def test_no_separating_blank_line(self):
2080 eq = self.ndiffAssertEqual
2081 msg = self._msgobj('msg_35.txt')
2082 eq(msg.as_string(), """\
2083From: aperson@dom.ain
2084To: bperson@dom.ain
2085Subject: here's something interesting
2086
2087counter to RFC 2822, there's no separating newline here
2088""")
2089
R David Murray80e0aee2012-05-27 21:23:34 -04002090 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002091 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002092 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002093 msg = self._msgobj('msg_41.txt')
2094 unless(hasattr(msg, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04002095 self.assertEqual(len(msg.defects), 2)
2096 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04002097 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04002098 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002099 errors.MultipartInvariantViolationDefect))
2100
R David Murray80e0aee2012-05-27 21:23:34 -04002101 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002102 def test_missing_start_boundary(self):
2103 outer = self._msgobj('msg_42.txt')
2104 # The message structure is:
2105 #
2106 # multipart/mixed
2107 # text/plain
2108 # message/rfc822
2109 # multipart/mixed [*]
2110 #
2111 # [*] This message is missing its start boundary
2112 bad = outer.get_payload(1).get_payload(0)
R David Murrayc27e5222012-05-25 15:01:48 -04002113 self.assertEqual(len(bad.defects), 1)
2114 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002115 errors.StartBoundaryNotFoundDefect))
2116
R David Murray80e0aee2012-05-27 21:23:34 -04002117 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002118 def test_first_line_is_continuation_header(self):
2119 eq = self.assertEqual
R David Murrayadbdcdb2012-05-27 20:45:01 -04002120 m = ' Line 1\nSubject: test\n\nbody'
R David Murrayc27e5222012-05-25 15:01:48 -04002121 msg = email.message_from_string(m)
R David Murrayadbdcdb2012-05-27 20:45:01 -04002122 eq(msg.keys(), ['Subject'])
2123 eq(msg.get_payload(), 'body')
R David Murrayc27e5222012-05-25 15:01:48 -04002124 eq(len(msg.defects), 1)
R David Murrayadbdcdb2012-05-27 20:45:01 -04002125 self.assertDefectsEqual(msg.defects,
2126 [errors.FirstHeaderLineIsContinuationDefect])
R David Murrayc27e5222012-05-25 15:01:48 -04002127 eq(msg.defects[0].line, ' Line 1\n')
R David Murray3edd22a2011-04-18 13:59:37 -04002128
R David Murrayd41595b2012-05-28 20:14:10 -04002129 # test_defect_handling
R David Murrayadbdcdb2012-05-27 20:45:01 -04002130 def test_missing_header_body_separator(self):
2131 # Our heuristic if we see a line that doesn't look like a header (no
2132 # leading whitespace but no ':') is to assume that the blank line that
2133 # separates the header from the body is missing, and to stop parsing
2134 # headers and start parsing the body.
2135 msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
2136 self.assertEqual(msg.keys(), ['Subject'])
2137 self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
2138 self.assertDefectsEqual(msg.defects,
2139 [errors.MissingHeaderBodySeparatorDefect])
2140
Ezio Melottib3aedd42010-11-20 19:04:17 +00002141
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002142# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00002143class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002144 def test_rfc2047_multiline(self):
2145 eq = self.assertEqual
2146 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2147 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2148 dh = decode_header(s)
2149 eq(dh, [
R David Murray07ea53c2012-06-02 17:56:49 -04002150 (b'Re: ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002151 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
R David Murray07ea53c2012-06-02 17:56:49 -04002152 (b' baz foo bar ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002153 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2154 header = make_header(dh)
2155 eq(str(header),
2156 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00002157 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002158Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2159 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002160
R David Murray07ea53c2012-06-02 17:56:49 -04002161 def test_whitespace_keeper_unicode(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002162 eq = self.assertEqual
2163 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2164 dh = decode_header(s)
2165 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
R David Murray07ea53c2012-06-02 17:56:49 -04002166 (b' Pirard <pirard@dom.ain>', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002167 header = str(make_header(dh))
2168 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2169
R David Murray07ea53c2012-06-02 17:56:49 -04002170 def test_whitespace_keeper_unicode_2(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002171 eq = self.assertEqual
2172 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2173 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002174 eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'),
2175 (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002176 hu = str(make_header(dh))
2177 eq(hu, 'The quick brown fox jumped over the lazy dog')
2178
2179 def test_rfc2047_missing_whitespace(self):
2180 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2181 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002182 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2183 (b'rg', None), (b'\xe5', 'iso-8859-1'),
2184 (b'sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002185
2186 def test_rfc2047_with_whitespace(self):
2187 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2188 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002189 self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'),
2190 (b' rg ', None), (b'\xe5', 'iso-8859-1'),
2191 (b' sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002192
R. David Murrayc4e69cc2010-08-03 22:14:10 +00002193 def test_rfc2047_B_bad_padding(self):
2194 s = '=?iso-8859-1?B?%s?='
2195 data = [ # only test complete bytes
2196 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2197 ('dmk=', b'vi'), ('dmk', b'vi')
2198 ]
2199 for q, a in data:
2200 dh = decode_header(s % q)
2201 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002202
R. David Murray31e984c2010-10-01 15:40:20 +00002203 def test_rfc2047_Q_invalid_digits(self):
2204 # issue 10004.
2205 s = '=?iso-8659-1?Q?andr=e9=zz?='
2206 self.assertEqual(decode_header(s),
2207 [(b'andr\xe9=zz', 'iso-8659-1')])
2208
R David Murray07ea53c2012-06-02 17:56:49 -04002209 def test_rfc2047_rfc2047_1(self):
2210 # 1st testcase at end of rfc2047
2211 s = '(=?ISO-8859-1?Q?a?=)'
2212 self.assertEqual(decode_header(s),
2213 [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)])
2214
2215 def test_rfc2047_rfc2047_2(self):
2216 # 2nd testcase at end of rfc2047
2217 s = '(=?ISO-8859-1?Q?a?= b)'
2218 self.assertEqual(decode_header(s),
2219 [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)])
2220
2221 def test_rfc2047_rfc2047_3(self):
2222 # 3rd testcase at end of rfc2047
2223 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2224 self.assertEqual(decode_header(s),
2225 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2226
2227 def test_rfc2047_rfc2047_4(self):
2228 # 4th testcase at end of rfc2047
2229 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2230 self.assertEqual(decode_header(s),
2231 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2232
2233 def test_rfc2047_rfc2047_5a(self):
2234 # 5th testcase at end of rfc2047 newline is \r\n
2235 s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)'
2236 self.assertEqual(decode_header(s),
2237 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2238
2239 def test_rfc2047_rfc2047_5b(self):
2240 # 5th testcase at end of rfc2047 newline is \n
2241 s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)'
2242 self.assertEqual(decode_header(s),
2243 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2244
2245 def test_rfc2047_rfc2047_6(self):
2246 # 6th testcase at end of rfc2047
2247 s = '(=?ISO-8859-1?Q?a_b?=)'
2248 self.assertEqual(decode_header(s),
2249 [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)])
2250
2251 def test_rfc2047_rfc2047_7(self):
2252 # 7th testcase at end of rfc2047
2253 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)'
2254 self.assertEqual(decode_header(s),
2255 [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'),
2256 (b')', None)])
2257 self.assertEqual(make_header(decode_header(s)).encode(), s.lower())
2258 self.assertEqual(str(make_header(decode_header(s))), '(a b)')
2259
R David Murray82ffabd2012-06-03 12:27:07 -04002260 def test_multiline_header(self):
2261 s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>'
2262 self.assertEqual(decode_header(s),
2263 [(b'"M\xfcller T"', 'windows-1252'),
2264 (b'<T.Mueller@xxx.com>', None)])
2265 self.assertEqual(make_header(decode_header(s)).encode(),
2266 ''.join(s.splitlines()))
2267 self.assertEqual(str(make_header(decode_header(s))),
2268 '"Müller T" <T.Mueller@xxx.com>')
2269
Ezio Melottib3aedd42010-11-20 19:04:17 +00002270
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002271# Test the MIMEMessage class
2272class TestMIMEMessage(TestEmailBase):
2273 def setUp(self):
2274 with openfile('msg_11.txt') as fp:
2275 self._text = fp.read()
2276
2277 def test_type_error(self):
2278 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2279
2280 def test_valid_argument(self):
2281 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002282 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002283 subject = 'A sub-message'
2284 m = Message()
2285 m['Subject'] = subject
2286 r = MIMEMessage(m)
2287 eq(r.get_content_type(), 'message/rfc822')
2288 payload = r.get_payload()
2289 unless(isinstance(payload, list))
2290 eq(len(payload), 1)
2291 subpart = payload[0]
2292 unless(subpart is m)
2293 eq(subpart['subject'], subject)
2294
2295 def test_bad_multipart(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002296 msg1 = Message()
2297 msg1['Subject'] = 'subpart 1'
2298 msg2 = Message()
2299 msg2['Subject'] = 'subpart 2'
2300 r = MIMEMessage(msg1)
2301 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2302
2303 def test_generate(self):
2304 # First craft the message to be encapsulated
2305 m = Message()
2306 m['Subject'] = 'An enclosed message'
2307 m.set_payload('Here is the body of the message.\n')
2308 r = MIMEMessage(m)
2309 r['Subject'] = 'The enclosing message'
2310 s = StringIO()
2311 g = Generator(s)
2312 g.flatten(r)
2313 self.assertEqual(s.getvalue(), """\
2314Content-Type: message/rfc822
2315MIME-Version: 1.0
2316Subject: The enclosing message
2317
2318Subject: An enclosed message
2319
2320Here is the body of the message.
2321""")
2322
2323 def test_parse_message_rfc822(self):
2324 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002325 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002326 msg = self._msgobj('msg_11.txt')
2327 eq(msg.get_content_type(), 'message/rfc822')
2328 payload = msg.get_payload()
2329 unless(isinstance(payload, list))
2330 eq(len(payload), 1)
2331 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002332 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002333 eq(submsg['subject'], 'An enclosed message')
2334 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2335
2336 def test_dsn(self):
2337 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002338 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002339 # msg 16 is a Delivery Status Notification, see RFC 1894
2340 msg = self._msgobj('msg_16.txt')
2341 eq(msg.get_content_type(), 'multipart/report')
2342 unless(msg.is_multipart())
2343 eq(len(msg.get_payload()), 3)
2344 # Subpart 1 is a text/plain, human readable section
2345 subpart = msg.get_payload(0)
2346 eq(subpart.get_content_type(), 'text/plain')
2347 eq(subpart.get_payload(), """\
2348This report relates to a message you sent with the following header fields:
2349
2350 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2351 Date: Sun, 23 Sep 2001 20:10:55 -0700
2352 From: "Ian T. Henry" <henryi@oxy.edu>
2353 To: SoCal Raves <scr@socal-raves.org>
2354 Subject: [scr] yeah for Ians!!
2355
2356Your message cannot be delivered to the following recipients:
2357
2358 Recipient address: jangel1@cougar.noc.ucla.edu
2359 Reason: recipient reached disk quota
2360
2361""")
2362 # Subpart 2 contains the machine parsable DSN information. It
2363 # consists of two blocks of headers, represented by two nested Message
2364 # objects.
2365 subpart = msg.get_payload(1)
2366 eq(subpart.get_content_type(), 'message/delivery-status')
2367 eq(len(subpart.get_payload()), 2)
2368 # message/delivery-status should treat each block as a bunch of
2369 # headers, i.e. a bunch of Message objects.
2370 dsn1 = subpart.get_payload(0)
2371 unless(isinstance(dsn1, Message))
2372 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2373 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2374 # Try a missing one <wink>
2375 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2376 dsn2 = subpart.get_payload(1)
2377 unless(isinstance(dsn2, Message))
2378 eq(dsn2['action'], 'failed')
2379 eq(dsn2.get_params(header='original-recipient'),
2380 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2381 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2382 # Subpart 3 is the original message
2383 subpart = msg.get_payload(2)
2384 eq(subpart.get_content_type(), 'message/rfc822')
2385 payload = subpart.get_payload()
2386 unless(isinstance(payload, list))
2387 eq(len(payload), 1)
2388 subsubpart = payload[0]
2389 unless(isinstance(subsubpart, Message))
2390 eq(subsubpart.get_content_type(), 'text/plain')
2391 eq(subsubpart['message-id'],
2392 '<002001c144a6$8752e060$56104586@oxy.edu>')
2393
2394 def test_epilogue(self):
2395 eq = self.ndiffAssertEqual
2396 with openfile('msg_21.txt') as fp:
2397 text = fp.read()
2398 msg = Message()
2399 msg['From'] = 'aperson@dom.ain'
2400 msg['To'] = 'bperson@dom.ain'
2401 msg['Subject'] = 'Test'
2402 msg.preamble = 'MIME message'
2403 msg.epilogue = 'End of MIME message\n'
2404 msg1 = MIMEText('One')
2405 msg2 = MIMEText('Two')
2406 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2407 msg.attach(msg1)
2408 msg.attach(msg2)
2409 sfp = StringIO()
2410 g = Generator(sfp)
2411 g.flatten(msg)
2412 eq(sfp.getvalue(), text)
2413
2414 def test_no_nl_preamble(self):
2415 eq = self.ndiffAssertEqual
2416 msg = Message()
2417 msg['From'] = 'aperson@dom.ain'
2418 msg['To'] = 'bperson@dom.ain'
2419 msg['Subject'] = 'Test'
2420 msg.preamble = 'MIME message'
2421 msg.epilogue = ''
2422 msg1 = MIMEText('One')
2423 msg2 = MIMEText('Two')
2424 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2425 msg.attach(msg1)
2426 msg.attach(msg2)
2427 eq(msg.as_string(), """\
2428From: aperson@dom.ain
2429To: bperson@dom.ain
2430Subject: Test
2431Content-Type: multipart/mixed; boundary="BOUNDARY"
2432
2433MIME message
2434--BOUNDARY
2435Content-Type: text/plain; charset="us-ascii"
2436MIME-Version: 1.0
2437Content-Transfer-Encoding: 7bit
2438
2439One
2440--BOUNDARY
2441Content-Type: text/plain; charset="us-ascii"
2442MIME-Version: 1.0
2443Content-Transfer-Encoding: 7bit
2444
2445Two
2446--BOUNDARY--
2447""")
2448
2449 def test_default_type(self):
2450 eq = self.assertEqual
2451 with openfile('msg_30.txt') as fp:
2452 msg = email.message_from_file(fp)
2453 container1 = msg.get_payload(0)
2454 eq(container1.get_default_type(), 'message/rfc822')
2455 eq(container1.get_content_type(), 'message/rfc822')
2456 container2 = msg.get_payload(1)
2457 eq(container2.get_default_type(), 'message/rfc822')
2458 eq(container2.get_content_type(), 'message/rfc822')
2459 container1a = container1.get_payload(0)
2460 eq(container1a.get_default_type(), 'text/plain')
2461 eq(container1a.get_content_type(), 'text/plain')
2462 container2a = container2.get_payload(0)
2463 eq(container2a.get_default_type(), 'text/plain')
2464 eq(container2a.get_content_type(), 'text/plain')
2465
2466 def test_default_type_with_explicit_container_type(self):
2467 eq = self.assertEqual
2468 with openfile('msg_28.txt') as fp:
2469 msg = email.message_from_file(fp)
2470 container1 = msg.get_payload(0)
2471 eq(container1.get_default_type(), 'message/rfc822')
2472 eq(container1.get_content_type(), 'message/rfc822')
2473 container2 = msg.get_payload(1)
2474 eq(container2.get_default_type(), 'message/rfc822')
2475 eq(container2.get_content_type(), 'message/rfc822')
2476 container1a = container1.get_payload(0)
2477 eq(container1a.get_default_type(), 'text/plain')
2478 eq(container1a.get_content_type(), 'text/plain')
2479 container2a = container2.get_payload(0)
2480 eq(container2a.get_default_type(), 'text/plain')
2481 eq(container2a.get_content_type(), 'text/plain')
2482
2483 def test_default_type_non_parsed(self):
2484 eq = self.assertEqual
2485 neq = self.ndiffAssertEqual
2486 # Set up container
2487 container = MIMEMultipart('digest', 'BOUNDARY')
2488 container.epilogue = ''
2489 # Set up subparts
2490 subpart1a = MIMEText('message 1\n')
2491 subpart2a = MIMEText('message 2\n')
2492 subpart1 = MIMEMessage(subpart1a)
2493 subpart2 = MIMEMessage(subpart2a)
2494 container.attach(subpart1)
2495 container.attach(subpart2)
2496 eq(subpart1.get_content_type(), 'message/rfc822')
2497 eq(subpart1.get_default_type(), 'message/rfc822')
2498 eq(subpart2.get_content_type(), 'message/rfc822')
2499 eq(subpart2.get_default_type(), 'message/rfc822')
2500 neq(container.as_string(0), '''\
2501Content-Type: multipart/digest; boundary="BOUNDARY"
2502MIME-Version: 1.0
2503
2504--BOUNDARY
2505Content-Type: message/rfc822
2506MIME-Version: 1.0
2507
2508Content-Type: text/plain; charset="us-ascii"
2509MIME-Version: 1.0
2510Content-Transfer-Encoding: 7bit
2511
2512message 1
2513
2514--BOUNDARY
2515Content-Type: message/rfc822
2516MIME-Version: 1.0
2517
2518Content-Type: text/plain; charset="us-ascii"
2519MIME-Version: 1.0
2520Content-Transfer-Encoding: 7bit
2521
2522message 2
2523
2524--BOUNDARY--
2525''')
2526 del subpart1['content-type']
2527 del subpart1['mime-version']
2528 del subpart2['content-type']
2529 del subpart2['mime-version']
2530 eq(subpart1.get_content_type(), 'message/rfc822')
2531 eq(subpart1.get_default_type(), 'message/rfc822')
2532 eq(subpart2.get_content_type(), 'message/rfc822')
2533 eq(subpart2.get_default_type(), 'message/rfc822')
2534 neq(container.as_string(0), '''\
2535Content-Type: multipart/digest; boundary="BOUNDARY"
2536MIME-Version: 1.0
2537
2538--BOUNDARY
2539
2540Content-Type: text/plain; charset="us-ascii"
2541MIME-Version: 1.0
2542Content-Transfer-Encoding: 7bit
2543
2544message 1
2545
2546--BOUNDARY
2547
2548Content-Type: text/plain; charset="us-ascii"
2549MIME-Version: 1.0
2550Content-Transfer-Encoding: 7bit
2551
2552message 2
2553
2554--BOUNDARY--
2555''')
2556
2557 def test_mime_attachments_in_constructor(self):
2558 eq = self.assertEqual
2559 text1 = MIMEText('')
2560 text2 = MIMEText('')
2561 msg = MIMEMultipart(_subparts=(text1, text2))
2562 eq(len(msg.get_payload()), 2)
2563 eq(msg.get_payload(0), text1)
2564 eq(msg.get_payload(1), text2)
2565
Christian Heimes587c2bf2008-01-19 16:21:02 +00002566 def test_default_multipart_constructor(self):
2567 msg = MIMEMultipart()
2568 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002569
Ezio Melottib3aedd42010-11-20 19:04:17 +00002570
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002571# A general test of parser->model->generator idempotency. IOW, read a message
2572# in, parse it into a message object tree, then without touching the tree,
2573# regenerate the plain text. The original text and the transformed text
2574# should be identical. Note: that we ignore the Unix-From since that may
2575# contain a changed date.
2576class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002577
2578 linesep = '\n'
2579
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002580 def _msgobj(self, filename):
2581 with openfile(filename) as fp:
2582 data = fp.read()
2583 msg = email.message_from_string(data)
2584 return msg, data
2585
R. David Murray719a4492010-11-21 16:53:48 +00002586 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002587 eq = self.ndiffAssertEqual
2588 s = StringIO()
2589 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002590 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002591 eq(text, s.getvalue())
2592
2593 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002594 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002595 msg, text = self._msgobj('msg_01.txt')
2596 eq(msg.get_content_type(), 'text/plain')
2597 eq(msg.get_content_maintype(), 'text')
2598 eq(msg.get_content_subtype(), 'plain')
2599 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2600 eq(msg.get_param('charset'), 'us-ascii')
2601 eq(msg.preamble, None)
2602 eq(msg.epilogue, None)
2603 self._idempotent(msg, text)
2604
2605 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002606 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002607 msg, text = self._msgobj('msg_03.txt')
2608 eq(msg.get_content_type(), 'text/plain')
2609 eq(msg.get_params(), None)
2610 eq(msg.get_param('charset'), None)
2611 self._idempotent(msg, text)
2612
2613 def test_simple_multipart(self):
2614 msg, text = self._msgobj('msg_04.txt')
2615 self._idempotent(msg, text)
2616
2617 def test_MIME_digest(self):
2618 msg, text = self._msgobj('msg_02.txt')
2619 self._idempotent(msg, text)
2620
2621 def test_long_header(self):
2622 msg, text = self._msgobj('msg_27.txt')
2623 self._idempotent(msg, text)
2624
2625 def test_MIME_digest_with_part_headers(self):
2626 msg, text = self._msgobj('msg_28.txt')
2627 self._idempotent(msg, text)
2628
2629 def test_mixed_with_image(self):
2630 msg, text = self._msgobj('msg_06.txt')
2631 self._idempotent(msg, text)
2632
2633 def test_multipart_report(self):
2634 msg, text = self._msgobj('msg_05.txt')
2635 self._idempotent(msg, text)
2636
2637 def test_dsn(self):
2638 msg, text = self._msgobj('msg_16.txt')
2639 self._idempotent(msg, text)
2640
2641 def test_preamble_epilogue(self):
2642 msg, text = self._msgobj('msg_21.txt')
2643 self._idempotent(msg, text)
2644
2645 def test_multipart_one_part(self):
2646 msg, text = self._msgobj('msg_23.txt')
2647 self._idempotent(msg, text)
2648
2649 def test_multipart_no_parts(self):
2650 msg, text = self._msgobj('msg_24.txt')
2651 self._idempotent(msg, text)
2652
2653 def test_no_start_boundary(self):
2654 msg, text = self._msgobj('msg_31.txt')
2655 self._idempotent(msg, text)
2656
2657 def test_rfc2231_charset(self):
2658 msg, text = self._msgobj('msg_32.txt')
2659 self._idempotent(msg, text)
2660
2661 def test_more_rfc2231_parameters(self):
2662 msg, text = self._msgobj('msg_33.txt')
2663 self._idempotent(msg, text)
2664
2665 def test_text_plain_in_a_multipart_digest(self):
2666 msg, text = self._msgobj('msg_34.txt')
2667 self._idempotent(msg, text)
2668
2669 def test_nested_multipart_mixeds(self):
2670 msg, text = self._msgobj('msg_12a.txt')
2671 self._idempotent(msg, text)
2672
2673 def test_message_external_body_idempotent(self):
2674 msg, text = self._msgobj('msg_36.txt')
2675 self._idempotent(msg, text)
2676
R. David Murray719a4492010-11-21 16:53:48 +00002677 def test_message_delivery_status(self):
2678 msg, text = self._msgobj('msg_43.txt')
2679 self._idempotent(msg, text, unixfrom=True)
2680
R. David Murray96fd54e2010-10-08 15:55:28 +00002681 def test_message_signed_idempotent(self):
2682 msg, text = self._msgobj('msg_45.txt')
2683 self._idempotent(msg, text)
2684
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002685 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002686 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002687 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002688 # Get a message object and reset the seek pointer for other tests
2689 msg, text = self._msgobj('msg_05.txt')
2690 eq(msg.get_content_type(), 'multipart/report')
2691 # Test the Content-Type: parameters
2692 params = {}
2693 for pk, pv in msg.get_params():
2694 params[pk] = pv
2695 eq(params['report-type'], 'delivery-status')
2696 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002697 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2698 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002699 eq(len(msg.get_payload()), 3)
2700 # Make sure the subparts are what we expect
2701 msg1 = msg.get_payload(0)
2702 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002703 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002704 msg2 = msg.get_payload(1)
2705 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002706 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002707 msg3 = msg.get_payload(2)
2708 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002709 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002710 payload = msg3.get_payload()
2711 unless(isinstance(payload, list))
2712 eq(len(payload), 1)
2713 msg4 = payload[0]
2714 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002715 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002716
2717 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002718 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002719 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002720 msg, text = self._msgobj('msg_06.txt')
2721 # Check some of the outer headers
2722 eq(msg.get_content_type(), 'message/rfc822')
2723 # Make sure the payload is a list of exactly one sub-Message, and that
2724 # that submessage has a type of text/plain
2725 payload = msg.get_payload()
2726 unless(isinstance(payload, list))
2727 eq(len(payload), 1)
2728 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002729 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002730 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002731 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002732 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002733
2734
Ezio Melottib3aedd42010-11-20 19:04:17 +00002735
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002736# Test various other bits of the package's functionality
2737class TestMiscellaneous(TestEmailBase):
2738 def test_message_from_string(self):
2739 with openfile('msg_01.txt') as fp:
2740 text = fp.read()
2741 msg = email.message_from_string(text)
2742 s = StringIO()
2743 # Don't wrap/continue long headers since we're trying to test
2744 # idempotency.
2745 g = Generator(s, maxheaderlen=0)
2746 g.flatten(msg)
2747 self.assertEqual(text, s.getvalue())
2748
2749 def test_message_from_file(self):
2750 with openfile('msg_01.txt') as fp:
2751 text = fp.read()
2752 fp.seek(0)
2753 msg = email.message_from_file(fp)
2754 s = StringIO()
2755 # Don't wrap/continue long headers since we're trying to test
2756 # idempotency.
2757 g = Generator(s, maxheaderlen=0)
2758 g.flatten(msg)
2759 self.assertEqual(text, s.getvalue())
2760
2761 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002762 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002763 with openfile('msg_01.txt') as fp:
2764 text = fp.read()
2765
2766 # Create a subclass
2767 class MyMessage(Message):
2768 pass
2769
2770 msg = email.message_from_string(text, MyMessage)
2771 unless(isinstance(msg, MyMessage))
2772 # Try something more complicated
2773 with openfile('msg_02.txt') as fp:
2774 text = fp.read()
2775 msg = email.message_from_string(text, MyMessage)
2776 for subpart in msg.walk():
2777 unless(isinstance(subpart, MyMessage))
2778
2779 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002780 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002781 # Create a subclass
2782 class MyMessage(Message):
2783 pass
2784
2785 with openfile('msg_01.txt') as fp:
2786 msg = email.message_from_file(fp, MyMessage)
2787 unless(isinstance(msg, MyMessage))
2788 # Try something more complicated
2789 with openfile('msg_02.txt') as fp:
2790 msg = email.message_from_file(fp, MyMessage)
2791 for subpart in msg.walk():
2792 unless(isinstance(subpart, MyMessage))
2793
R David Murrayc27e5222012-05-25 15:01:48 -04002794 def test_custom_message_does_not_require_arguments(self):
2795 class MyMessage(Message):
2796 def __init__(self):
2797 super().__init__()
2798 msg = self._str_msg("Subject: test\n\ntest", MyMessage)
2799 self.assertTrue(isinstance(msg, MyMessage))
2800
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002801 def test__all__(self):
2802 module = __import__('email')
R David Murray1b6c7242012-03-16 22:43:05 -04002803 self.assertEqual(sorted(module.__all__), [
2804 'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2805 'generator', 'header', 'iterators', 'message',
2806 'message_from_binary_file', 'message_from_bytes',
2807 'message_from_file', 'message_from_string', 'mime', 'parser',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002808 'quoprimime', 'utils',
2809 ])
2810
2811 def test_formatdate(self):
2812 now = time.time()
2813 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2814 time.gmtime(now)[:6])
2815
2816 def test_formatdate_localtime(self):
2817 now = time.time()
2818 self.assertEqual(
2819 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2820 time.localtime(now)[:6])
2821
2822 def test_formatdate_usegmt(self):
2823 now = time.time()
2824 self.assertEqual(
2825 utils.formatdate(now, localtime=False),
2826 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2827 self.assertEqual(
2828 utils.formatdate(now, localtime=False, usegmt=True),
2829 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2830
Georg Brandl1aca31e2012-09-22 09:03:56 +02002831 # parsedate and parsedate_tz will become deprecated interfaces someday
2832 def test_parsedate_returns_None_for_invalid_strings(self):
2833 self.assertIsNone(utils.parsedate(''))
2834 self.assertIsNone(utils.parsedate_tz(''))
2835 self.assertIsNone(utils.parsedate('0'))
2836 self.assertIsNone(utils.parsedate_tz('0'))
2837 self.assertIsNone(utils.parsedate('A Complete Waste of Time'))
2838 self.assertIsNone(utils.parsedate_tz('A Complete Waste of Time'))
2839 # Not a part of the spec but, but this has historically worked:
2840 self.assertIsNone(utils.parsedate(None))
2841 self.assertIsNone(utils.parsedate_tz(None))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002842
2843 def test_parsedate_compact(self):
2844 # The FWS after the comma is optional
2845 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2846 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2847
2848 def test_parsedate_no_dayofweek(self):
2849 eq = self.assertEqual
2850 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2851 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2852
2853 def test_parsedate_compact_no_dayofweek(self):
2854 eq = self.assertEqual
2855 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2856 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2857
R. David Murray4a62e892010-12-23 20:35:46 +00002858 def test_parsedate_no_space_before_positive_offset(self):
2859 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2860 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2861
2862 def test_parsedate_no_space_before_negative_offset(self):
2863 # Issue 1155362: we already handled '+' for this case.
2864 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2865 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2866
2867
R David Murrayaccd1c02011-03-13 20:06:23 -04002868 def test_parsedate_accepts_time_with_dots(self):
2869 eq = self.assertEqual
2870 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2871 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2872 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2873 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2874
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002875 def test_parsedate_acceptable_to_time_functions(self):
2876 eq = self.assertEqual
2877 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2878 t = int(time.mktime(timetup))
2879 eq(time.localtime(t)[:6], timetup[:6])
2880 eq(int(time.strftime('%Y', timetup)), 2003)
2881 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2882 t = int(time.mktime(timetup[:9]))
2883 eq(time.localtime(t)[:6], timetup[:6])
2884 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2885
Alexander Belopolskya07548e2012-06-21 20:34:09 -04002886 def test_mktime_tz(self):
2887 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2888 -1, -1, -1, 0)), 0)
2889 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2890 -1, -1, -1, 1234)), -1234)
2891
R. David Murray219d1c82010-08-25 00:45:55 +00002892 def test_parsedate_y2k(self):
2893 """Test for parsing a date with a two-digit year.
2894
2895 Parsing a date with a two-digit year should return the correct
2896 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2897 obsoletes RFC822) requires four-digit years.
2898
2899 """
2900 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2901 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2902 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2903 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2904
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002905 def test_parseaddr_empty(self):
2906 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2907 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2908
2909 def test_noquote_dump(self):
2910 self.assertEqual(
2911 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2912 'A Silly Person <person@dom.ain>')
2913
2914 def test_escape_dump(self):
2915 self.assertEqual(
2916 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
R David Murrayb53319f2012-03-14 15:31:47 -04002917 r'"A (Very) Silly Person" <person@dom.ain>')
2918 self.assertEqual(
2919 utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
2920 ('A (Very) Silly Person', 'person@dom.ain'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002921 a = r'A \(Special\) Person'
2922 b = 'person@dom.ain'
2923 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2924
2925 def test_escape_backslashes(self):
2926 self.assertEqual(
2927 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2928 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2929 a = r'Arthur \Backslash\ Foobar'
2930 b = 'person@dom.ain'
2931 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2932
R David Murray8debacb2011-04-06 09:35:57 -04002933 def test_quotes_unicode_names(self):
2934 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2935 name = "H\u00e4ns W\u00fcrst"
2936 addr = 'person@dom.ain'
2937 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2938 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
2939 self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
2940 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
2941 latin1_quopri)
2942
2943 def test_accepts_any_charset_like_object(self):
2944 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2945 name = "H\u00e4ns W\u00fcrst"
2946 addr = 'person@dom.ain'
2947 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2948 foobar = "FOOBAR"
2949 class CharsetMock:
2950 def header_encode(self, string):
2951 return foobar
2952 mock = CharsetMock()
2953 mock_expected = "%s <%s>" % (foobar, addr)
2954 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
2955 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
2956 utf8_base64)
2957
2958 def test_invalid_charset_like_object_raises_error(self):
2959 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2960 name = "H\u00e4ns W\u00fcrst"
2961 addr = 'person@dom.ain'
2962 # A object without a header_encode method:
2963 bad_charset = object()
2964 self.assertRaises(AttributeError, utils.formataddr, (name, addr),
2965 bad_charset)
2966
2967 def test_unicode_address_raises_error(self):
2968 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2969 addr = 'pers\u00f6n@dom.in'
2970 self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
2971 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
2972
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002973 def test_name_with_dot(self):
2974 x = 'John X. Doe <jxd@example.com>'
2975 y = '"John X. Doe" <jxd@example.com>'
2976 a, b = ('John X. Doe', 'jxd@example.com')
2977 self.assertEqual(utils.parseaddr(x), (a, b))
2978 self.assertEqual(utils.parseaddr(y), (a, b))
2979 # formataddr() quotes the name if there's a dot in it
2980 self.assertEqual(utils.formataddr((a, b)), y)
2981
R. David Murray5397e862010-10-02 15:58:26 +00002982 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2983 # issue 10005. Note that in the third test the second pair of
2984 # backslashes is not actually a quoted pair because it is not inside a
2985 # comment or quoted string: the address being parsed has a quoted
2986 # string containing a quoted backslash, followed by 'example' and two
2987 # backslashes, followed by another quoted string containing a space and
2988 # the word 'example'. parseaddr copies those two backslashes
2989 # literally. Per rfc5322 this is not technically correct since a \ may
2990 # not appear in an address outside of a quoted string. It is probably
2991 # a sensible Postel interpretation, though.
2992 eq = self.assertEqual
2993 eq(utils.parseaddr('""example" example"@example.com'),
2994 ('', '""example" example"@example.com'))
2995 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2996 ('', '"\\"example\\" example"@example.com'))
2997 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2998 ('', '"\\\\"example\\\\" example"@example.com'))
2999
R. David Murray63563cd2010-12-18 18:25:38 +00003000 def test_parseaddr_preserves_spaces_in_local_part(self):
3001 # issue 9286. A normal RFC5322 local part should not contain any
3002 # folding white space, but legacy local parts can (they are a sequence
3003 # of atoms, not dotatoms). On the other hand we strip whitespace from
3004 # before the @ and around dots, on the assumption that the whitespace
3005 # around the punctuation is a mistake in what would otherwise be
3006 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
3007 self.assertEqual(('', "merwok wok@xample.com"),
3008 utils.parseaddr("merwok wok@xample.com"))
3009 self.assertEqual(('', "merwok wok@xample.com"),
3010 utils.parseaddr("merwok wok@xample.com"))
3011 self.assertEqual(('', "merwok wok@xample.com"),
3012 utils.parseaddr(" merwok wok @xample.com"))
3013 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
3014 utils.parseaddr('merwok"wok" wok@xample.com'))
3015 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
3016 utils.parseaddr('merwok. wok . wok@xample.com'))
3017
R David Murrayb53319f2012-03-14 15:31:47 -04003018 def test_formataddr_does_not_quote_parens_in_quoted_string(self):
3019 addr = ("'foo@example.com' (foo@example.com)",
3020 'foo@example.com')
3021 addrstr = ('"\'foo@example.com\' '
3022 '(foo@example.com)" <foo@example.com>')
3023 self.assertEqual(utils.parseaddr(addrstr), addr)
3024 self.assertEqual(utils.formataddr(addr), addrstr)
3025
3026
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003027 def test_multiline_from_comment(self):
3028 x = """\
3029Foo
3030\tBar <foo@example.com>"""
3031 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
3032
3033 def test_quote_dump(self):
3034 self.assertEqual(
3035 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
3036 r'"A Silly; Person" <person@dom.ain>')
3037
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003038 def test_charset_richcomparisons(self):
3039 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003040 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003041 cset1 = Charset()
3042 cset2 = Charset()
3043 eq(cset1, 'us-ascii')
3044 eq(cset1, 'US-ASCII')
3045 eq(cset1, 'Us-AsCiI')
3046 eq('us-ascii', cset1)
3047 eq('US-ASCII', cset1)
3048 eq('Us-AsCiI', cset1)
3049 ne(cset1, 'usascii')
3050 ne(cset1, 'USASCII')
3051 ne(cset1, 'UsAsCiI')
3052 ne('usascii', cset1)
3053 ne('USASCII', cset1)
3054 ne('UsAsCiI', cset1)
3055 eq(cset1, cset2)
3056 eq(cset2, cset1)
3057
3058 def test_getaddresses(self):
3059 eq = self.assertEqual
3060 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
3061 'Bud Person <bperson@dom.ain>']),
3062 [('Al Person', 'aperson@dom.ain'),
3063 ('Bud Person', 'bperson@dom.ain')])
3064
3065 def test_getaddresses_nasty(self):
3066 eq = self.assertEqual
3067 eq(utils.getaddresses(['foo: ;']), [('', '')])
3068 eq(utils.getaddresses(
3069 ['[]*-- =~$']),
3070 [('', ''), ('', ''), ('', '*--')])
3071 eq(utils.getaddresses(
3072 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
3073 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
3074
3075 def test_getaddresses_embedded_comment(self):
3076 """Test proper handling of a nested comment"""
3077 eq = self.assertEqual
3078 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
3079 eq(addrs[0][1], 'foo@bar.com')
3080
3081 def test_utils_quote_unquote(self):
3082 eq = self.assertEqual
3083 msg = Message()
3084 msg.add_header('content-disposition', 'attachment',
3085 filename='foo\\wacky"name')
3086 eq(msg.get_filename(), 'foo\\wacky"name')
3087
3088 def test_get_body_encoding_with_bogus_charset(self):
3089 charset = Charset('not a charset')
3090 self.assertEqual(charset.get_body_encoding(), 'base64')
3091
3092 def test_get_body_encoding_with_uppercase_charset(self):
3093 eq = self.assertEqual
3094 msg = Message()
3095 msg['Content-Type'] = 'text/plain; charset=UTF-8'
3096 eq(msg['content-type'], 'text/plain; charset=UTF-8')
3097 charsets = msg.get_charsets()
3098 eq(len(charsets), 1)
3099 eq(charsets[0], 'utf-8')
3100 charset = Charset(charsets[0])
3101 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003102 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003103 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
3104 eq(msg.get_payload(decode=True), b'hello world')
3105 eq(msg['content-transfer-encoding'], 'base64')
3106 # Try another one
3107 msg = Message()
3108 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
3109 charsets = msg.get_charsets()
3110 eq(len(charsets), 1)
3111 eq(charsets[0], 'us-ascii')
3112 charset = Charset(charsets[0])
3113 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
3114 msg.set_payload('hello world', charset=charset)
3115 eq(msg.get_payload(), 'hello world')
3116 eq(msg['content-transfer-encoding'], '7bit')
3117
3118 def test_charsets_case_insensitive(self):
3119 lc = Charset('us-ascii')
3120 uc = Charset('US-ASCII')
3121 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
3122
3123 def test_partial_falls_inside_message_delivery_status(self):
3124 eq = self.ndiffAssertEqual
3125 # The Parser interface provides chunks of data to FeedParser in 8192
3126 # byte gulps. SF bug #1076485 found one of those chunks inside
3127 # message/delivery-status header block, which triggered an
3128 # unreadline() of NeedMoreData.
3129 msg = self._msgobj('msg_43.txt')
3130 sfp = StringIO()
3131 iterators._structure(msg, sfp)
3132 eq(sfp.getvalue(), """\
3133multipart/report
3134 text/plain
3135 message/delivery-status
3136 text/plain
3137 text/plain
3138 text/plain
3139 text/plain
3140 text/plain
3141 text/plain
3142 text/plain
3143 text/plain
3144 text/plain
3145 text/plain
3146 text/plain
3147 text/plain
3148 text/plain
3149 text/plain
3150 text/plain
3151 text/plain
3152 text/plain
3153 text/plain
3154 text/plain
3155 text/plain
3156 text/plain
3157 text/plain
3158 text/plain
3159 text/plain
3160 text/plain
3161 text/plain
3162 text/rfc822-headers
3163""")
3164
R. David Murraya0b44b52010-12-02 21:47:19 +00003165 def test_make_msgid_domain(self):
3166 self.assertEqual(
3167 email.utils.make_msgid(domain='testdomain-string')[-19:],
3168 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003169
R David Murraye67c6c52013-03-07 16:38:03 -05003170 def test_Generator_linend(self):
3171 # Issue 14645.
3172 with openfile('msg_26.txt', newline='\n') as f:
3173 msgtxt = f.read()
3174 msgtxt_nl = msgtxt.replace('\r\n', '\n')
3175 msg = email.message_from_string(msgtxt)
3176 s = StringIO()
3177 g = email.generator.Generator(s)
3178 g.flatten(msg)
3179 self.assertEqual(s.getvalue(), msgtxt_nl)
3180
3181 def test_BytesGenerator_linend(self):
3182 # Issue 14645.
3183 with openfile('msg_26.txt', newline='\n') as f:
3184 msgtxt = f.read()
3185 msgtxt_nl = msgtxt.replace('\r\n', '\n')
3186 msg = email.message_from_string(msgtxt_nl)
3187 s = BytesIO()
3188 g = email.generator.BytesGenerator(s)
3189 g.flatten(msg, linesep='\r\n')
3190 self.assertEqual(s.getvalue().decode('ascii'), msgtxt)
3191
3192 def test_BytesGenerator_linend_with_non_ascii(self):
3193 # Issue 14645.
3194 with openfile('msg_26.txt', 'rb') as f:
3195 msgtxt = f.read()
3196 msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6')
3197 msgtxt_nl = msgtxt.replace(b'\r\n', b'\n')
3198 msg = email.message_from_bytes(msgtxt_nl)
3199 s = BytesIO()
3200 g = email.generator.BytesGenerator(s)
3201 g.flatten(msg, linesep='\r\n')
3202 self.assertEqual(s.getvalue(), msgtxt)
3203
Ezio Melottib3aedd42010-11-20 19:04:17 +00003204
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003205# Test the iterator/generators
3206class TestIterators(TestEmailBase):
3207 def test_body_line_iterator(self):
3208 eq = self.assertEqual
3209 neq = self.ndiffAssertEqual
3210 # First a simple non-multipart message
3211 msg = self._msgobj('msg_01.txt')
3212 it = iterators.body_line_iterator(msg)
3213 lines = list(it)
3214 eq(len(lines), 6)
3215 neq(EMPTYSTRING.join(lines), msg.get_payload())
3216 # Now a more complicated multipart
3217 msg = self._msgobj('msg_02.txt')
3218 it = iterators.body_line_iterator(msg)
3219 lines = list(it)
3220 eq(len(lines), 43)
3221 with openfile('msg_19.txt') as fp:
3222 neq(EMPTYSTRING.join(lines), fp.read())
3223
3224 def test_typed_subpart_iterator(self):
3225 eq = self.assertEqual
3226 msg = self._msgobj('msg_04.txt')
3227 it = iterators.typed_subpart_iterator(msg, 'text')
3228 lines = []
3229 subparts = 0
3230 for subpart in it:
3231 subparts += 1
3232 lines.append(subpart.get_payload())
3233 eq(subparts, 2)
3234 eq(EMPTYSTRING.join(lines), """\
3235a simple kind of mirror
3236to reflect upon our own
3237a simple kind of mirror
3238to reflect upon our own
3239""")
3240
3241 def test_typed_subpart_iterator_default_type(self):
3242 eq = self.assertEqual
3243 msg = self._msgobj('msg_03.txt')
3244 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
3245 lines = []
3246 subparts = 0
3247 for subpart in it:
3248 subparts += 1
3249 lines.append(subpart.get_payload())
3250 eq(subparts, 1)
3251 eq(EMPTYSTRING.join(lines), """\
3252
3253Hi,
3254
3255Do you like this message?
3256
3257-Me
3258""")
3259
R. David Murray45bf773f2010-07-17 01:19:57 +00003260 def test_pushCR_LF(self):
3261 '''FeedParser BufferedSubFile.push() assumed it received complete
3262 line endings. A CR ending one push() followed by a LF starting
3263 the next push() added an empty line.
3264 '''
3265 imt = [
3266 ("a\r \n", 2),
3267 ("b", 0),
3268 ("c\n", 1),
3269 ("", 0),
3270 ("d\r\n", 1),
3271 ("e\r", 0),
3272 ("\nf", 1),
3273 ("\r\n", 1),
3274 ]
3275 from email.feedparser import BufferedSubFile, NeedMoreData
3276 bsf = BufferedSubFile()
3277 om = []
3278 nt = 0
3279 for il, n in imt:
3280 bsf.push(il)
3281 nt += n
3282 n1 = 0
3283 while True:
3284 ol = bsf.readline()
3285 if ol == NeedMoreData:
3286 break
3287 om.append(ol)
3288 n1 += 1
3289 self.assertTrue(n == n1)
3290 self.assertTrue(len(om) == nt)
3291 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
3292
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003293
Ezio Melottib3aedd42010-11-20 19:04:17 +00003294
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003295class TestParsers(TestEmailBase):
R David Murrayb35c8502011-04-13 16:46:05 -04003296
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003297 def test_header_parser(self):
3298 eq = self.assertEqual
3299 # Parse only the headers of a complex multipart MIME document
3300 with openfile('msg_02.txt') as fp:
3301 msg = HeaderParser().parse(fp)
3302 eq(msg['from'], 'ppp-request@zzz.org')
3303 eq(msg['to'], 'ppp@zzz.org')
3304 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003305 self.assertFalse(msg.is_multipart())
3306 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003307
R David Murrayb35c8502011-04-13 16:46:05 -04003308 def test_bytes_header_parser(self):
3309 eq = self.assertEqual
3310 # Parse only the headers of a complex multipart MIME document
3311 with openfile('msg_02.txt', 'rb') as fp:
3312 msg = email.parser.BytesHeaderParser().parse(fp)
3313 eq(msg['from'], 'ppp-request@zzz.org')
3314 eq(msg['to'], 'ppp@zzz.org')
3315 eq(msg.get_content_type(), 'multipart/mixed')
3316 self.assertFalse(msg.is_multipart())
3317 self.assertTrue(isinstance(msg.get_payload(), str))
3318 self.assertTrue(isinstance(msg.get_payload(decode=True), bytes))
3319
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003320 def test_whitespace_continuation(self):
3321 eq = self.assertEqual
3322 # This message contains a line after the Subject: header that has only
3323 # whitespace, but it is not empty!
3324 msg = email.message_from_string("""\
3325From: aperson@dom.ain
3326To: bperson@dom.ain
3327Subject: the next line has a space on it
3328\x20
3329Date: Mon, 8 Apr 2002 15:09:19 -0400
3330Message-ID: spam
3331
3332Here's the message body
3333""")
3334 eq(msg['subject'], 'the next line has a space on it\n ')
3335 eq(msg['message-id'], 'spam')
3336 eq(msg.get_payload(), "Here's the message body\n")
3337
3338 def test_whitespace_continuation_last_header(self):
3339 eq = self.assertEqual
3340 # Like the previous test, but the subject line is the last
3341 # header.
3342 msg = email.message_from_string("""\
3343From: aperson@dom.ain
3344To: bperson@dom.ain
3345Date: Mon, 8 Apr 2002 15:09:19 -0400
3346Message-ID: spam
3347Subject: the next line has a space on it
3348\x20
3349
3350Here's the message body
3351""")
3352 eq(msg['subject'], 'the next line has a space on it\n ')
3353 eq(msg['message-id'], 'spam')
3354 eq(msg.get_payload(), "Here's the message body\n")
3355
3356 def test_crlf_separation(self):
3357 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003358 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003359 msg = Parser().parse(fp)
3360 eq(len(msg.get_payload()), 2)
3361 part1 = msg.get_payload(0)
3362 eq(part1.get_content_type(), 'text/plain')
3363 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3364 part2 = msg.get_payload(1)
3365 eq(part2.get_content_type(), 'application/riscos')
3366
R. David Murray8451c4b2010-10-23 22:19:56 +00003367 def test_crlf_flatten(self):
3368 # Using newline='\n' preserves the crlfs in this input file.
3369 with openfile('msg_26.txt', newline='\n') as fp:
3370 text = fp.read()
3371 msg = email.message_from_string(text)
3372 s = StringIO()
3373 g = Generator(s)
3374 g.flatten(msg, linesep='\r\n')
3375 self.assertEqual(s.getvalue(), text)
3376
3377 maxDiff = None
3378
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003379 def test_multipart_digest_with_extra_mime_headers(self):
3380 eq = self.assertEqual
3381 neq = self.ndiffAssertEqual
3382 with openfile('msg_28.txt') as fp:
3383 msg = email.message_from_file(fp)
3384 # Structure is:
3385 # multipart/digest
3386 # message/rfc822
3387 # text/plain
3388 # message/rfc822
3389 # text/plain
3390 eq(msg.is_multipart(), 1)
3391 eq(len(msg.get_payload()), 2)
3392 part1 = msg.get_payload(0)
3393 eq(part1.get_content_type(), 'message/rfc822')
3394 eq(part1.is_multipart(), 1)
3395 eq(len(part1.get_payload()), 1)
3396 part1a = part1.get_payload(0)
3397 eq(part1a.is_multipart(), 0)
3398 eq(part1a.get_content_type(), 'text/plain')
3399 neq(part1a.get_payload(), 'message 1\n')
3400 # next message/rfc822
3401 part2 = msg.get_payload(1)
3402 eq(part2.get_content_type(), 'message/rfc822')
3403 eq(part2.is_multipart(), 1)
3404 eq(len(part2.get_payload()), 1)
3405 part2a = part2.get_payload(0)
3406 eq(part2a.is_multipart(), 0)
3407 eq(part2a.get_content_type(), 'text/plain')
3408 neq(part2a.get_payload(), 'message 2\n')
3409
3410 def test_three_lines(self):
3411 # A bug report by Andrew McNamara
3412 lines = ['From: Andrew Person <aperson@dom.ain',
3413 'Subject: Test',
3414 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3415 msg = email.message_from_string(NL.join(lines))
3416 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3417
3418 def test_strip_line_feed_and_carriage_return_in_headers(self):
3419 eq = self.assertEqual
3420 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3421 value1 = 'text'
3422 value2 = 'more text'
3423 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3424 value1, value2)
3425 msg = email.message_from_string(m)
3426 eq(msg.get('Header'), value1)
3427 eq(msg.get('Next-Header'), value2)
3428
3429 def test_rfc2822_header_syntax(self):
3430 eq = self.assertEqual
3431 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3432 msg = email.message_from_string(m)
3433 eq(len(msg), 3)
3434 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3435 eq(msg.get_payload(), 'body')
3436
3437 def test_rfc2822_space_not_allowed_in_header(self):
3438 eq = self.assertEqual
3439 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3440 msg = email.message_from_string(m)
3441 eq(len(msg.keys()), 0)
3442
3443 def test_rfc2822_one_character_header(self):
3444 eq = self.assertEqual
3445 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3446 msg = email.message_from_string(m)
3447 headers = msg.keys()
3448 headers.sort()
3449 eq(headers, ['A', 'B', 'CC'])
3450 eq(msg.get_payload(), 'body')
3451
R. David Murray45e0e142010-06-16 02:19:40 +00003452 def test_CRLFLF_at_end_of_part(self):
3453 # issue 5610: feedparser should not eat two chars from body part ending
3454 # with "\r\n\n".
3455 m = (
3456 "From: foo@bar.com\n"
3457 "To: baz\n"
3458 "Mime-Version: 1.0\n"
3459 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3460 "\n"
3461 "--BOUNDARY\n"
3462 "Content-Type: text/plain\n"
3463 "\n"
3464 "body ending with CRLF newline\r\n"
3465 "\n"
3466 "--BOUNDARY--\n"
3467 )
3468 msg = email.message_from_string(m)
3469 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003470
Ezio Melottib3aedd42010-11-20 19:04:17 +00003471
R. David Murray96fd54e2010-10-08 15:55:28 +00003472class Test8BitBytesHandling(unittest.TestCase):
3473 # In Python3 all input is string, but that doesn't work if the actual input
3474 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3475 # decode byte streams using the surrogateescape error handler, and
3476 # reconvert to binary at appropriate places if we detect surrogates. This
3477 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3478 # but it does allow us to parse and preserve them, and to decode body
3479 # parts that use an 8bit CTE.
3480
3481 bodytest_msg = textwrap.dedent("""\
3482 From: foo@bar.com
3483 To: baz
3484 Mime-Version: 1.0
3485 Content-Type: text/plain; charset={charset}
3486 Content-Transfer-Encoding: {cte}
3487
3488 {bodyline}
3489 """)
3490
3491 def test_known_8bit_CTE(self):
3492 m = self.bodytest_msg.format(charset='utf-8',
3493 cte='8bit',
3494 bodyline='pöstal').encode('utf-8')
3495 msg = email.message_from_bytes(m)
3496 self.assertEqual(msg.get_payload(), "pöstal\n")
3497 self.assertEqual(msg.get_payload(decode=True),
3498 "pöstal\n".encode('utf-8'))
3499
3500 def test_unknown_8bit_CTE(self):
3501 m = self.bodytest_msg.format(charset='notavalidcharset',
3502 cte='8bit',
3503 bodyline='pöstal').encode('utf-8')
3504 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003505 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003506 self.assertEqual(msg.get_payload(decode=True),
3507 "pöstal\n".encode('utf-8'))
3508
3509 def test_8bit_in_quopri_body(self):
3510 # This is non-RFC compliant data...without 'decode' the library code
3511 # decodes the body using the charset from the headers, and because the
3512 # source byte really is utf-8 this works. This is likely to fail
3513 # against real dirty data (ie: produce mojibake), but the data is
3514 # invalid anyway so it is as good a guess as any. But this means that
3515 # this test just confirms the current behavior; that behavior is not
3516 # necessarily the best possible behavior. With 'decode' it is
3517 # returning the raw bytes, so that test should be of correct behavior,
3518 # or at least produce the same result that email4 did.
3519 m = self.bodytest_msg.format(charset='utf-8',
3520 cte='quoted-printable',
3521 bodyline='p=C3=B6stál').encode('utf-8')
3522 msg = email.message_from_bytes(m)
3523 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3524 self.assertEqual(msg.get_payload(decode=True),
3525 'pöstál\n'.encode('utf-8'))
3526
3527 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3528 # This is similar to the previous test, but proves that if the 8bit
3529 # byte is undecodeable in the specified charset, it gets replaced
3530 # by the unicode 'unknown' character. Again, this may or may not
3531 # be the ideal behavior. Note that if decode=False none of the
3532 # decoders will get involved, so this is the only test we need
3533 # for this behavior.
3534 m = self.bodytest_msg.format(charset='ascii',
3535 cte='quoted-printable',
3536 bodyline='p=C3=B6stál').encode('utf-8')
3537 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003538 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003539 self.assertEqual(msg.get_payload(decode=True),
3540 'pöstál\n'.encode('utf-8'))
3541
R David Murray80e0aee2012-05-27 21:23:34 -04003542 # test_defect_handling:test_invalid_chars_in_base64_payload
R. David Murray96fd54e2010-10-08 15:55:28 +00003543 def test_8bit_in_base64_body(self):
R David Murray80e0aee2012-05-27 21:23:34 -04003544 # If we get 8bit bytes in a base64 body, we can just ignore them
3545 # as being outside the base64 alphabet and decode anyway. But
3546 # we register a defect.
R. David Murray96fd54e2010-10-08 15:55:28 +00003547 m = self.bodytest_msg.format(charset='utf-8',
3548 cte='base64',
3549 bodyline='cMO2c3RhbAá=').encode('utf-8')
3550 msg = email.message_from_bytes(m)
3551 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -04003552 'pöstal'.encode('utf-8'))
3553 self.assertIsInstance(msg.defects[0],
3554 errors.InvalidBase64CharactersDefect)
R. David Murray96fd54e2010-10-08 15:55:28 +00003555
3556 def test_8bit_in_uuencode_body(self):
3557 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3558 # normal means, so the block is returned undecoded, but as bytes.
3559 m = self.bodytest_msg.format(charset='utf-8',
3560 cte='uuencode',
3561 bodyline='<,.V<W1A; á ').encode('utf-8')
3562 msg = email.message_from_bytes(m)
3563 self.assertEqual(msg.get_payload(decode=True),
3564 '<,.V<W1A; á \n'.encode('utf-8'))
3565
3566
R. David Murray92532142011-01-07 23:25:30 +00003567 headertest_headers = (
3568 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3569 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3570 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3571 '\tJean de Baddie',
3572 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3573 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3574 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3575 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3576 )
3577 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3578 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003579
3580 def test_get_8bit_header(self):
3581 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003582 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3583 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003584
3585 def test_print_8bit_headers(self):
3586 msg = email.message_from_bytes(self.headertest_msg)
3587 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003588 textwrap.dedent("""\
3589 From: {}
3590 To: {}
3591 Subject: {}
3592 From: {}
3593
3594 Yes, they are flying.
3595 """).format(*[expected[1] for (_, expected) in
3596 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003597
3598 def test_values_with_8bit_headers(self):
3599 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003600 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003601 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003602 'b\uFFFD\uFFFDz',
3603 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3604 'coll\uFFFD\uFFFDgue, le pouf '
3605 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003606 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003607 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003608
3609 def test_items_with_8bit_headers(self):
3610 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003611 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003612 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003613 ('To', 'b\uFFFD\uFFFDz'),
3614 ('Subject', 'Maintenant je vous '
3615 'pr\uFFFD\uFFFDsente '
3616 'mon coll\uFFFD\uFFFDgue, le pouf '
3617 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3618 '\tJean de Baddie'),
3619 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003620
3621 def test_get_all_with_8bit_headers(self):
3622 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003623 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003624 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003625 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003626
R David Murraya2150232011-03-16 21:11:23 -04003627 def test_get_content_type_with_8bit(self):
3628 msg = email.message_from_bytes(textwrap.dedent("""\
3629 Content-Type: text/pl\xA7in; charset=utf-8
3630 """).encode('latin-1'))
3631 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3632 self.assertEqual(msg.get_content_maintype(), "text")
3633 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3634
R David Murray97f43c02012-06-24 05:03:27 -04003635 # test_headerregistry.TestContentTypeHeader.non_ascii_in_params
R David Murraya2150232011-03-16 21:11:23 -04003636 def test_get_params_with_8bit(self):
3637 msg = email.message_from_bytes(
3638 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3639 self.assertEqual(msg.get_params(header='x-header'),
3640 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3641 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3642 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3643 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3644
R David Murray97f43c02012-06-24 05:03:27 -04003645 # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value
R David Murraya2150232011-03-16 21:11:23 -04003646 def test_get_rfc2231_params_with_8bit(self):
3647 msg = email.message_from_bytes(textwrap.dedent("""\
3648 Content-Type: text/plain; charset=us-ascii;
3649 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3650 ).encode('latin-1'))
3651 self.assertEqual(msg.get_param('title'),
3652 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3653
3654 def test_set_rfc2231_params_with_8bit(self):
3655 msg = email.message_from_bytes(textwrap.dedent("""\
3656 Content-Type: text/plain; charset=us-ascii;
3657 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3658 ).encode('latin-1'))
3659 msg.set_param('title', 'test')
3660 self.assertEqual(msg.get_param('title'), 'test')
3661
3662 def test_del_rfc2231_params_with_8bit(self):
3663 msg = email.message_from_bytes(textwrap.dedent("""\
3664 Content-Type: text/plain; charset=us-ascii;
3665 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3666 ).encode('latin-1'))
3667 msg.del_param('title')
3668 self.assertEqual(msg.get_param('title'), None)
3669 self.assertEqual(msg.get_content_maintype(), 'text')
3670
3671 def test_get_payload_with_8bit_cte_header(self):
3672 msg = email.message_from_bytes(textwrap.dedent("""\
3673 Content-Transfer-Encoding: b\xa7se64
3674 Content-Type: text/plain; charset=latin-1
3675
3676 payload
3677 """).encode('latin-1'))
3678 self.assertEqual(msg.get_payload(), 'payload\n')
3679 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3680
R. David Murray96fd54e2010-10-08 15:55:28 +00003681 non_latin_bin_msg = textwrap.dedent("""\
3682 From: foo@bar.com
3683 To: báz
3684 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3685 \tJean de Baddie
3686 Mime-Version: 1.0
3687 Content-Type: text/plain; charset="utf-8"
3688 Content-Transfer-Encoding: 8bit
3689
3690 Да, они летят.
3691 """).encode('utf-8')
3692
3693 def test_bytes_generator(self):
3694 msg = email.message_from_bytes(self.non_latin_bin_msg)
3695 out = BytesIO()
3696 email.generator.BytesGenerator(out).flatten(msg)
3697 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3698
R. David Murray7372a072011-01-26 21:21:32 +00003699 def test_bytes_generator_handles_None_body(self):
3700 #Issue 11019
3701 msg = email.message.Message()
3702 out = BytesIO()
3703 email.generator.BytesGenerator(out).flatten(msg)
3704 self.assertEqual(out.getvalue(), b"\n")
3705
R. David Murray92532142011-01-07 23:25:30 +00003706 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003707 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003708 To: =?unknown-8bit?q?b=C3=A1z?=
3709 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3710 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3711 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003712 Mime-Version: 1.0
3713 Content-Type: text/plain; charset="utf-8"
3714 Content-Transfer-Encoding: base64
3715
3716 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3717 """)
3718
3719 def test_generator_handles_8bit(self):
3720 msg = email.message_from_bytes(self.non_latin_bin_msg)
3721 out = StringIO()
3722 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003723 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003724
3725 def test_bytes_generator_with_unix_from(self):
3726 # The unixfrom contains a current date, so we can't check it
3727 # literally. Just make sure the first word is 'From' and the
3728 # rest of the message matches the input.
3729 msg = email.message_from_bytes(self.non_latin_bin_msg)
3730 out = BytesIO()
3731 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3732 lines = out.getvalue().split(b'\n')
3733 self.assertEqual(lines[0].split()[0], b'From')
3734 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3735
R. David Murray92532142011-01-07 23:25:30 +00003736 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3737 non_latin_bin_msg_as7bit[2:4] = [
3738 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3739 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3740 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3741
R. David Murray96fd54e2010-10-08 15:55:28 +00003742 def test_message_from_binary_file(self):
3743 fn = 'test.msg'
3744 self.addCleanup(unlink, fn)
3745 with open(fn, 'wb') as testfile:
3746 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003747 with open(fn, 'rb') as testfile:
3748 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003749 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3750
3751 latin_bin_msg = textwrap.dedent("""\
3752 From: foo@bar.com
3753 To: Dinsdale
3754 Subject: Nudge nudge, wink, wink
3755 Mime-Version: 1.0
3756 Content-Type: text/plain; charset="latin-1"
3757 Content-Transfer-Encoding: 8bit
3758
3759 oh là là, know what I mean, know what I mean?
3760 """).encode('latin-1')
3761
3762 latin_bin_msg_as7bit = textwrap.dedent("""\
3763 From: foo@bar.com
3764 To: Dinsdale
3765 Subject: Nudge nudge, wink, wink
3766 Mime-Version: 1.0
3767 Content-Type: text/plain; charset="iso-8859-1"
3768 Content-Transfer-Encoding: quoted-printable
3769
3770 oh l=E0 l=E0, know what I mean, know what I mean?
3771 """)
3772
3773 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3774 m = email.message_from_bytes(self.latin_bin_msg)
3775 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3776
3777 def test_decoded_generator_emits_unicode_body(self):
3778 m = email.message_from_bytes(self.latin_bin_msg)
3779 out = StringIO()
3780 email.generator.DecodedGenerator(out).flatten(m)
3781 #DecodedHeader output contains an extra blank line compared
3782 #to the input message. RDM: not sure if this is a bug or not,
3783 #but it is not specific to the 8bit->7bit conversion.
3784 self.assertEqual(out.getvalue(),
3785 self.latin_bin_msg.decode('latin-1')+'\n')
3786
3787 def test_bytes_feedparser(self):
3788 bfp = email.feedparser.BytesFeedParser()
3789 for i in range(0, len(self.latin_bin_msg), 10):
3790 bfp.feed(self.latin_bin_msg[i:i+10])
3791 m = bfp.close()
3792 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3793
R. David Murray8451c4b2010-10-23 22:19:56 +00003794 def test_crlf_flatten(self):
3795 with openfile('msg_26.txt', 'rb') as fp:
3796 text = fp.read()
3797 msg = email.message_from_bytes(text)
3798 s = BytesIO()
3799 g = email.generator.BytesGenerator(s)
3800 g.flatten(msg, linesep='\r\n')
3801 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003802
3803 def test_8bit_multipart(self):
3804 # Issue 11605
3805 source = textwrap.dedent("""\
3806 Date: Fri, 18 Mar 2011 17:15:43 +0100
3807 To: foo@example.com
3808 From: foodwatch-Newsletter <bar@example.com>
3809 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3810 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3811 MIME-Version: 1.0
3812 Content-Type: multipart/alternative;
3813 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3814
3815 --b1_76a486bee62b0d200f33dc2ca08220ad
3816 Content-Type: text/plain; charset="utf-8"
3817 Content-Transfer-Encoding: 8bit
3818
3819 Guten Tag, ,
3820
3821 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3822 Nachrichten aus Japan.
3823
3824
3825 --b1_76a486bee62b0d200f33dc2ca08220ad
3826 Content-Type: text/html; charset="utf-8"
3827 Content-Transfer-Encoding: 8bit
3828
3829 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3830 "http://www.w3.org/TR/html4/loose.dtd">
3831 <html lang="de">
3832 <head>
3833 <title>foodwatch - Newsletter</title>
3834 </head>
3835 <body>
3836 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3837 die Nachrichten aus Japan.</p>
3838 </body>
3839 </html>
3840 --b1_76a486bee62b0d200f33dc2ca08220ad--
3841
3842 """).encode('utf-8')
3843 msg = email.message_from_bytes(source)
3844 s = BytesIO()
3845 g = email.generator.BytesGenerator(s)
3846 g.flatten(msg)
3847 self.assertEqual(s.getvalue(), source)
3848
R David Murray9fd170e2012-03-14 14:05:03 -04003849 def test_bytes_generator_b_encoding_linesep(self):
3850 # Issue 14062: b encoding was tacking on an extra \n.
3851 m = Message()
3852 # This has enough non-ascii that it should always end up b encoded.
3853 m['Subject'] = Header('žluťoučký kůň')
3854 s = BytesIO()
3855 g = email.generator.BytesGenerator(s)
3856 g.flatten(m, linesep='\r\n')
3857 self.assertEqual(
3858 s.getvalue(),
3859 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3860
3861 def test_generator_b_encoding_linesep(self):
3862 # Since this broke in ByteGenerator, test Generator for completeness.
3863 m = Message()
3864 # This has enough non-ascii that it should always end up b encoded.
3865 m['Subject'] = Header('žluťoučký kůň')
3866 s = StringIO()
3867 g = email.generator.Generator(s)
3868 g.flatten(m, linesep='\r\n')
3869 self.assertEqual(
3870 s.getvalue(),
3871 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3872
R. David Murray8451c4b2010-10-23 22:19:56 +00003873 maxDiff = None
3874
Ezio Melottib3aedd42010-11-20 19:04:17 +00003875
R. David Murray719a4492010-11-21 16:53:48 +00003876class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003877
R. David Murraye5db2632010-11-20 15:10:13 +00003878 maxDiff = None
3879
R. David Murray96fd54e2010-10-08 15:55:28 +00003880 def _msgobj(self, filename):
3881 with openfile(filename, 'rb') as fp:
3882 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003883 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003884 msg = email.message_from_bytes(data)
3885 return msg, data
3886
R. David Murray719a4492010-11-21 16:53:48 +00003887 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003888 b = BytesIO()
3889 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003890 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R David Murraya46ed112011-03-31 13:11:40 -04003891 self.assertEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003892
3893
R. David Murray719a4492010-11-21 16:53:48 +00003894class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3895 TestIdempotent):
3896 linesep = '\n'
3897 blinesep = b'\n'
3898 normalize_linesep_regex = re.compile(br'\r\n')
3899
3900
3901class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3902 TestIdempotent):
3903 linesep = '\r\n'
3904 blinesep = b'\r\n'
3905 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3906
Ezio Melottib3aedd42010-11-20 19:04:17 +00003907
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003908class TestBase64(unittest.TestCase):
3909 def test_len(self):
3910 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003911 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003912 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003913 for size in range(15):
3914 if size == 0 : bsize = 0
3915 elif size <= 3 : bsize = 4
3916 elif size <= 6 : bsize = 8
3917 elif size <= 9 : bsize = 12
3918 elif size <= 12: bsize = 16
3919 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003920 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003921
3922 def test_decode(self):
3923 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003924 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003925 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003926
3927 def test_encode(self):
3928 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003929 eq(base64mime.body_encode(b''), b'')
3930 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003931 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003932 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003933 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003934 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003935eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3936eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3937eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3938eHh4eCB4eHh4IA==
3939""")
3940 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003941 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003942 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003943eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3944eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3945eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3946eHh4eCB4eHh4IA==\r
3947""")
3948
3949 def test_header_encode(self):
3950 eq = self.assertEqual
3951 he = base64mime.header_encode
3952 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003953 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3954 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003955 # Test the charset option
3956 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3957 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003958
3959
Ezio Melottib3aedd42010-11-20 19:04:17 +00003960
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003961class TestQuopri(unittest.TestCase):
3962 def setUp(self):
3963 # Set of characters (as byte integers) that don't need to be encoded
3964 # in headers.
3965 self.hlit = list(chain(
3966 range(ord('a'), ord('z') + 1),
3967 range(ord('A'), ord('Z') + 1),
3968 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003969 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003970 # Set of characters (as byte integers) that do need to be encoded in
3971 # headers.
3972 self.hnon = [c for c in range(256) if c not in self.hlit]
3973 assert len(self.hlit) + len(self.hnon) == 256
3974 # Set of characters (as byte integers) that don't need to be encoded
3975 # in bodies.
3976 self.blit = list(range(ord(' '), ord('~') + 1))
3977 self.blit.append(ord('\t'))
3978 self.blit.remove(ord('='))
3979 # Set of characters (as byte integers) that do need to be encoded in
3980 # bodies.
3981 self.bnon = [c for c in range(256) if c not in self.blit]
3982 assert len(self.blit) + len(self.bnon) == 256
3983
Guido van Rossum9604e662007-08-30 03:46:43 +00003984 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003985 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003986 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003987 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003988 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003989 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003990 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003991
Guido van Rossum9604e662007-08-30 03:46:43 +00003992 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003993 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003994 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003995 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003996 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003997 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003998 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003999
4000 def test_header_quopri_len(self):
4001 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00004002 eq(quoprimime.header_length(b'hello'), 5)
4003 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004004 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00004005 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004006 # =?xxx?q?...?= means 10 extra characters
4007 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00004008 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
4009 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004010 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00004011 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004012 # =?xxx?q?...?= means 10 extra characters
4013 10)
4014 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00004015 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004016 'expected length 1 for %r' % chr(c))
4017 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00004018 # Space is special; it's encoded to _
4019 if c == ord(' '):
4020 continue
4021 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004022 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00004023 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004024
4025 def test_body_quopri_len(self):
4026 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004027 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00004028 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004029 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00004030 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004031
4032 def test_quote_unquote_idempotent(self):
4033 for x in range(256):
4034 c = chr(x)
4035 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
4036
R David Murrayec1b5b82011-03-23 14:19:05 -04004037 def _test_header_encode(self, header, expected_encoded_header, charset=None):
4038 if charset is None:
4039 encoded_header = quoprimime.header_encode(header)
4040 else:
4041 encoded_header = quoprimime.header_encode(header, charset)
4042 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004043
R David Murraycafd79d2011-03-23 15:25:55 -04004044 def test_header_encode_null(self):
4045 self._test_header_encode(b'', '')
4046
R David Murrayec1b5b82011-03-23 14:19:05 -04004047 def test_header_encode_one_word(self):
4048 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
4049
4050 def test_header_encode_two_lines(self):
4051 self._test_header_encode(b'hello\nworld',
4052 '=?iso-8859-1?q?hello=0Aworld?=')
4053
4054 def test_header_encode_non_ascii(self):
4055 self._test_header_encode(b'hello\xc7there',
4056 '=?iso-8859-1?q?hello=C7there?=')
4057
4058 def test_header_encode_alt_charset(self):
4059 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
4060 charset='iso-8859-2')
4061
4062 def _test_header_decode(self, encoded_header, expected_decoded_header):
4063 decoded_header = quoprimime.header_decode(encoded_header)
4064 self.assertEqual(decoded_header, expected_decoded_header)
4065
4066 def test_header_decode_null(self):
4067 self._test_header_decode('', '')
4068
4069 def test_header_decode_one_word(self):
4070 self._test_header_decode('hello', 'hello')
4071
4072 def test_header_decode_two_lines(self):
4073 self._test_header_decode('hello=0Aworld', 'hello\nworld')
4074
4075 def test_header_decode_non_ascii(self):
4076 self._test_header_decode('hello=C7there', 'hello\xc7there')
4077
Ezio Melotti2a99d5d2013-07-06 17:16:04 +02004078 def test_header_decode_re_bug_18380(self):
4079 # Issue 18380: Call re.sub with a positional argument for flags in the wrong position
4080 self.assertEqual(quoprimime.header_decode('=30' * 257), '0' * 257)
4081
R David Murrayec1b5b82011-03-23 14:19:05 -04004082 def _test_decode(self, encoded, expected_decoded, eol=None):
4083 if eol is None:
4084 decoded = quoprimime.decode(encoded)
4085 else:
4086 decoded = quoprimime.decode(encoded, eol=eol)
4087 self.assertEqual(decoded, expected_decoded)
4088
4089 def test_decode_null_word(self):
4090 self._test_decode('', '')
4091
4092 def test_decode_null_line_null_word(self):
4093 self._test_decode('\r\n', '\n')
4094
4095 def test_decode_one_word(self):
4096 self._test_decode('hello', 'hello')
4097
4098 def test_decode_one_word_eol(self):
4099 self._test_decode('hello', 'hello', eol='X')
4100
4101 def test_decode_one_line(self):
4102 self._test_decode('hello\r\n', 'hello\n')
4103
4104 def test_decode_one_line_lf(self):
4105 self._test_decode('hello\n', 'hello\n')
4106
R David Murraycafd79d2011-03-23 15:25:55 -04004107 def test_decode_one_line_cr(self):
4108 self._test_decode('hello\r', 'hello\n')
4109
4110 def test_decode_one_line_nl(self):
4111 self._test_decode('hello\n', 'helloX', eol='X')
4112
4113 def test_decode_one_line_crnl(self):
4114 self._test_decode('hello\r\n', 'helloX', eol='X')
4115
R David Murrayec1b5b82011-03-23 14:19:05 -04004116 def test_decode_one_line_one_word(self):
4117 self._test_decode('hello\r\nworld', 'hello\nworld')
4118
4119 def test_decode_one_line_one_word_eol(self):
4120 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
4121
4122 def test_decode_two_lines(self):
4123 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
4124
R David Murraycafd79d2011-03-23 15:25:55 -04004125 def test_decode_two_lines_eol(self):
4126 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
4127
R David Murrayec1b5b82011-03-23 14:19:05 -04004128 def test_decode_one_long_line(self):
4129 self._test_decode('Spam' * 250, 'Spam' * 250)
4130
4131 def test_decode_one_space(self):
4132 self._test_decode(' ', '')
4133
4134 def test_decode_multiple_spaces(self):
4135 self._test_decode(' ' * 5, '')
4136
4137 def test_decode_one_line_trailing_spaces(self):
4138 self._test_decode('hello \r\n', 'hello\n')
4139
4140 def test_decode_two_lines_trailing_spaces(self):
4141 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
4142
4143 def test_decode_quoted_word(self):
4144 self._test_decode('=22quoted=20words=22', '"quoted words"')
4145
4146 def test_decode_uppercase_quoting(self):
4147 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
4148
4149 def test_decode_lowercase_quoting(self):
4150 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
4151
4152 def test_decode_soft_line_break(self):
4153 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
4154
4155 def test_decode_false_quoting(self):
4156 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
4157
4158 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
4159 kwargs = {}
4160 if maxlinelen is None:
4161 # Use body_encode's default.
4162 maxlinelen = 76
4163 else:
4164 kwargs['maxlinelen'] = maxlinelen
4165 if eol is None:
4166 # Use body_encode's default.
4167 eol = '\n'
4168 else:
4169 kwargs['eol'] = eol
4170 encoded_body = quoprimime.body_encode(body, **kwargs)
4171 self.assertEqual(encoded_body, expected_encoded_body)
4172 if eol == '\n' or eol == '\r\n':
4173 # We know how to split the result back into lines, so maxlinelen
4174 # can be checked.
4175 for line in encoded_body.splitlines():
4176 self.assertLessEqual(len(line), maxlinelen)
4177
4178 def test_encode_null(self):
4179 self._test_encode('', '')
4180
4181 def test_encode_null_lines(self):
4182 self._test_encode('\n\n', '\n\n')
4183
4184 def test_encode_one_line(self):
4185 self._test_encode('hello\n', 'hello\n')
4186
4187 def test_encode_one_line_crlf(self):
4188 self._test_encode('hello\r\n', 'hello\n')
4189
4190 def test_encode_one_line_eol(self):
4191 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
4192
4193 def test_encode_one_space(self):
4194 self._test_encode(' ', '=20')
4195
4196 def test_encode_one_line_one_space(self):
4197 self._test_encode(' \n', '=20\n')
4198
R David Murrayb938c8c2011-03-24 12:19:26 -04004199# XXX: body_encode() expect strings, but uses ord(char) from these strings
4200# to index into a 256-entry list. For code points above 255, this will fail.
4201# Should there be a check for 8-bit only ord() values in body, or at least
4202# a comment about the expected input?
4203
4204 def test_encode_two_lines_one_space(self):
4205 self._test_encode(' \n \n', '=20\n=20\n')
4206
R David Murrayec1b5b82011-03-23 14:19:05 -04004207 def test_encode_one_word_trailing_spaces(self):
4208 self._test_encode('hello ', 'hello =20')
4209
4210 def test_encode_one_line_trailing_spaces(self):
4211 self._test_encode('hello \n', 'hello =20\n')
4212
4213 def test_encode_one_word_trailing_tab(self):
4214 self._test_encode('hello \t', 'hello =09')
4215
4216 def test_encode_one_line_trailing_tab(self):
4217 self._test_encode('hello \t\n', 'hello =09\n')
4218
4219 def test_encode_trailing_space_before_maxlinelen(self):
4220 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
4221
R David Murrayb938c8c2011-03-24 12:19:26 -04004222 def test_encode_trailing_space_at_maxlinelen(self):
4223 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
4224
R David Murrayec1b5b82011-03-23 14:19:05 -04004225 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04004226 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
4227
4228 def test_encode_whitespace_lines(self):
4229 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04004230
4231 def test_encode_quoted_equals(self):
4232 self._test_encode('a = b', 'a =3D b')
4233
4234 def test_encode_one_long_string(self):
4235 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
4236
4237 def test_encode_one_long_line(self):
4238 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4239
4240 def test_encode_one_very_long_line(self):
4241 self._test_encode('x' * 200 + '\n',
4242 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
4243
4244 def test_encode_one_long_line(self):
4245 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4246
4247 def test_encode_shortest_maxlinelen(self):
4248 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004249
R David Murrayb938c8c2011-03-24 12:19:26 -04004250 def test_encode_maxlinelen_too_small(self):
4251 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
4252
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004253 def test_encode(self):
4254 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00004255 eq(quoprimime.body_encode(''), '')
4256 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004257 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00004258 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004259 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00004260 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004261xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
4262 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
4263x xxxx xxxx xxxx xxxx=20""")
4264 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00004265 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4266 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004267xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
4268 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
4269x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00004270 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004271one line
4272
4273two line"""), """\
4274one line
4275
4276two line""")
4277
4278
Ezio Melottib3aedd42010-11-20 19:04:17 +00004279
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004280# Test the Charset class
4281class TestCharset(unittest.TestCase):
4282 def tearDown(self):
4283 from email import charset as CharsetModule
4284 try:
4285 del CharsetModule.CHARSETS['fake']
4286 except KeyError:
4287 pass
4288
Guido van Rossum9604e662007-08-30 03:46:43 +00004289 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004290 eq = self.assertEqual
4291 # Make sure us-ascii = no Unicode conversion
4292 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00004293 eq(c.header_encode('Hello World!'), 'Hello World!')
4294 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004295 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00004296 self.assertRaises(UnicodeError, c.header_encode, s)
4297 c = Charset('utf-8')
4298 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004299
4300 def test_body_encode(self):
4301 eq = self.assertEqual
4302 # Try a charset with QP body encoding
4303 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004304 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004305 # Try a charset with Base64 body encoding
4306 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00004307 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004308 # Try a charset with None body encoding
4309 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004310 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004311 # Try the convert argument, where input codec != output codec
4312 c = Charset('euc-jp')
4313 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00004314 # XXX FIXME
4315## try:
4316## eq('\x1b$B5FCO;~IW\x1b(B',
4317## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4318## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4319## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4320## except LookupError:
4321## # We probably don't have the Japanese codecs installed
4322## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004323 # Testing SF bug #625509, which we have to fake, since there are no
4324 # built-in encodings where the header encoding is QP but the body
4325 # encoding is not.
4326 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04004327 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004328 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04004329 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004330
4331 def test_unicode_charset_name(self):
4332 charset = Charset('us-ascii')
4333 self.assertEqual(str(charset), 'us-ascii')
4334 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4335
4336
Ezio Melottib3aedd42010-11-20 19:04:17 +00004337
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004338# Test multilingual MIME headers.
4339class TestHeader(TestEmailBase):
4340 def test_simple(self):
4341 eq = self.ndiffAssertEqual
4342 h = Header('Hello World!')
4343 eq(h.encode(), 'Hello World!')
4344 h.append(' Goodbye World!')
4345 eq(h.encode(), 'Hello World! Goodbye World!')
4346
4347 def test_simple_surprise(self):
4348 eq = self.ndiffAssertEqual
4349 h = Header('Hello World!')
4350 eq(h.encode(), 'Hello World!')
4351 h.append('Goodbye World!')
4352 eq(h.encode(), 'Hello World! Goodbye World!')
4353
4354 def test_header_needs_no_decoding(self):
4355 h = 'no decoding needed'
4356 self.assertEqual(decode_header(h), [(h, None)])
4357
4358 def test_long(self):
4359 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4360 maxlinelen=76)
4361 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004362 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004363
4364 def test_multilingual(self):
4365 eq = self.ndiffAssertEqual
4366 g = Charset("iso-8859-1")
4367 cz = Charset("iso-8859-2")
4368 utf8 = Charset("utf-8")
4369 g_head = (b'Die Mieter treten hier ein werden mit einem '
4370 b'Foerderband komfortabel den Korridor entlang, '
4371 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4372 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4373 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4374 b'd\xf9vtipu.. ')
4375 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4376 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4377 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4378 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4379 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4380 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4381 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4382 '\u3044\u307e\u3059\u3002')
4383 h = Header(g_head, g)
4384 h.append(cz_head, cz)
4385 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004386 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004387 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004388=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4389 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4390 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4391 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004392 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4393 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4394 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4395 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004396 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4397 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4398 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4399 decoded = decode_header(enc)
4400 eq(len(decoded), 3)
4401 eq(decoded[0], (g_head, 'iso-8859-1'))
4402 eq(decoded[1], (cz_head, 'iso-8859-2'))
4403 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004404 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004405 eq(ustr,
4406 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4407 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4408 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4409 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4410 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4411 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4412 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4413 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4414 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4415 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4416 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4417 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4418 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4419 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4420 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4421 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4422 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004423 # Test make_header()
4424 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004425 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004426
4427 def test_empty_header_encode(self):
4428 h = Header()
4429 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004430
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004431 def test_header_ctor_default_args(self):
4432 eq = self.ndiffAssertEqual
4433 h = Header()
4434 eq(h, '')
4435 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004436 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004437
4438 def test_explicit_maxlinelen(self):
4439 eq = self.ndiffAssertEqual
4440 hstr = ('A very long line that must get split to something other '
4441 'than at the 76th character boundary to test the non-default '
4442 'behavior')
4443 h = Header(hstr)
4444 eq(h.encode(), '''\
4445A very long line that must get split to something other than at the 76th
4446 character boundary to test the non-default behavior''')
4447 eq(str(h), hstr)
4448 h = Header(hstr, header_name='Subject')
4449 eq(h.encode(), '''\
4450A very long line that must get split to something other than at the
4451 76th character boundary to test the non-default behavior''')
4452 eq(str(h), hstr)
4453 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4454 eq(h.encode(), hstr)
4455 eq(str(h), hstr)
4456
Guido van Rossum9604e662007-08-30 03:46:43 +00004457 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004458 eq = self.ndiffAssertEqual
4459 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004460 x = 'xxxx ' * 20
4461 h.append(x)
4462 s = h.encode()
4463 eq(s, """\
4464=?iso-8859-1?q?xxx?=
4465 =?iso-8859-1?q?x_?=
4466 =?iso-8859-1?q?xx?=
4467 =?iso-8859-1?q?xx?=
4468 =?iso-8859-1?q?_x?=
4469 =?iso-8859-1?q?xx?=
4470 =?iso-8859-1?q?x_?=
4471 =?iso-8859-1?q?xx?=
4472 =?iso-8859-1?q?xx?=
4473 =?iso-8859-1?q?_x?=
4474 =?iso-8859-1?q?xx?=
4475 =?iso-8859-1?q?x_?=
4476 =?iso-8859-1?q?xx?=
4477 =?iso-8859-1?q?xx?=
4478 =?iso-8859-1?q?_x?=
4479 =?iso-8859-1?q?xx?=
4480 =?iso-8859-1?q?x_?=
4481 =?iso-8859-1?q?xx?=
4482 =?iso-8859-1?q?xx?=
4483 =?iso-8859-1?q?_x?=
4484 =?iso-8859-1?q?xx?=
4485 =?iso-8859-1?q?x_?=
4486 =?iso-8859-1?q?xx?=
4487 =?iso-8859-1?q?xx?=
4488 =?iso-8859-1?q?_x?=
4489 =?iso-8859-1?q?xx?=
4490 =?iso-8859-1?q?x_?=
4491 =?iso-8859-1?q?xx?=
4492 =?iso-8859-1?q?xx?=
4493 =?iso-8859-1?q?_x?=
4494 =?iso-8859-1?q?xx?=
4495 =?iso-8859-1?q?x_?=
4496 =?iso-8859-1?q?xx?=
4497 =?iso-8859-1?q?xx?=
4498 =?iso-8859-1?q?_x?=
4499 =?iso-8859-1?q?xx?=
4500 =?iso-8859-1?q?x_?=
4501 =?iso-8859-1?q?xx?=
4502 =?iso-8859-1?q?xx?=
4503 =?iso-8859-1?q?_x?=
4504 =?iso-8859-1?q?xx?=
4505 =?iso-8859-1?q?x_?=
4506 =?iso-8859-1?q?xx?=
4507 =?iso-8859-1?q?xx?=
4508 =?iso-8859-1?q?_x?=
4509 =?iso-8859-1?q?xx?=
4510 =?iso-8859-1?q?x_?=
4511 =?iso-8859-1?q?xx?=
4512 =?iso-8859-1?q?xx?=
4513 =?iso-8859-1?q?_?=""")
4514 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004515 h = Header(charset='iso-8859-1', maxlinelen=40)
4516 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004517 s = h.encode()
4518 eq(s, """\
4519=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4520 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4521 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4522 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4523 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4524 eq(x, str(make_header(decode_header(s))))
4525
4526 def test_base64_splittable(self):
4527 eq = self.ndiffAssertEqual
4528 h = Header(charset='koi8-r', maxlinelen=20)
4529 x = 'xxxx ' * 20
4530 h.append(x)
4531 s = h.encode()
4532 eq(s, """\
4533=?koi8-r?b?eHh4?=
4534 =?koi8-r?b?eCB4?=
4535 =?koi8-r?b?eHh4?=
4536 =?koi8-r?b?IHh4?=
4537 =?koi8-r?b?eHgg?=
4538 =?koi8-r?b?eHh4?=
4539 =?koi8-r?b?eCB4?=
4540 =?koi8-r?b?eHh4?=
4541 =?koi8-r?b?IHh4?=
4542 =?koi8-r?b?eHgg?=
4543 =?koi8-r?b?eHh4?=
4544 =?koi8-r?b?eCB4?=
4545 =?koi8-r?b?eHh4?=
4546 =?koi8-r?b?IHh4?=
4547 =?koi8-r?b?eHgg?=
4548 =?koi8-r?b?eHh4?=
4549 =?koi8-r?b?eCB4?=
4550 =?koi8-r?b?eHh4?=
4551 =?koi8-r?b?IHh4?=
4552 =?koi8-r?b?eHgg?=
4553 =?koi8-r?b?eHh4?=
4554 =?koi8-r?b?eCB4?=
4555 =?koi8-r?b?eHh4?=
4556 =?koi8-r?b?IHh4?=
4557 =?koi8-r?b?eHgg?=
4558 =?koi8-r?b?eHh4?=
4559 =?koi8-r?b?eCB4?=
4560 =?koi8-r?b?eHh4?=
4561 =?koi8-r?b?IHh4?=
4562 =?koi8-r?b?eHgg?=
4563 =?koi8-r?b?eHh4?=
4564 =?koi8-r?b?eCB4?=
4565 =?koi8-r?b?eHh4?=
4566 =?koi8-r?b?IA==?=""")
4567 eq(x, str(make_header(decode_header(s))))
4568 h = Header(charset='koi8-r', maxlinelen=40)
4569 h.append(x)
4570 s = h.encode()
4571 eq(s, """\
4572=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4573 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4574 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4575 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4576 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4577 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4578 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004579
4580 def test_us_ascii_header(self):
4581 eq = self.assertEqual
4582 s = 'hello'
4583 x = decode_header(s)
4584 eq(x, [('hello', None)])
4585 h = make_header(x)
4586 eq(s, h.encode())
4587
4588 def test_string_charset(self):
4589 eq = self.assertEqual
4590 h = Header()
4591 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004592 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004593
4594## def test_unicode_error(self):
4595## raises = self.assertRaises
4596## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4597## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4598## h = Header()
4599## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4600## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4601## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4602
4603 def test_utf8_shortest(self):
4604 eq = self.assertEqual
4605 h = Header('p\xf6stal', 'utf-8')
4606 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4607 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4608 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4609
4610 def test_bad_8bit_header(self):
4611 raises = self.assertRaises
4612 eq = self.assertEqual
4613 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4614 raises(UnicodeError, Header, x)
4615 h = Header()
4616 raises(UnicodeError, h.append, x)
4617 e = x.decode('utf-8', 'replace')
4618 eq(str(Header(x, errors='replace')), e)
4619 h.append(x, errors='replace')
4620 eq(str(h), e)
4621
R David Murray041015c2011-03-25 15:10:55 -04004622 def test_escaped_8bit_header(self):
4623 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004624 e = x.decode('ascii', 'surrogateescape')
4625 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004626 self.assertEqual(str(h),
4627 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4628 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4629
R David Murraye5e366c2011-06-18 12:57:28 -04004630 def test_header_handles_binary_unknown8bit(self):
4631 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4632 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4633 self.assertEqual(str(h),
4634 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4635 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4636
4637 def test_make_header_handles_binary_unknown8bit(self):
4638 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4639 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4640 h2 = email.header.make_header(email.header.decode_header(h))
4641 self.assertEqual(str(h2),
4642 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4643 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4644
R David Murray041015c2011-03-25 15:10:55 -04004645 def test_modify_returned_list_does_not_change_header(self):
4646 h = Header('test')
4647 chunks = email.header.decode_header(h)
4648 chunks.append(('ascii', 'test2'))
4649 self.assertEqual(str(h), 'test')
4650
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004651 def test_encoded_adjacent_nonencoded(self):
4652 eq = self.assertEqual
4653 h = Header()
4654 h.append('hello', 'iso-8859-1')
4655 h.append('world')
4656 s = h.encode()
4657 eq(s, '=?iso-8859-1?q?hello?= world')
4658 h = make_header(decode_header(s))
4659 eq(h.encode(), s)
4660
R David Murray07ea53c2012-06-02 17:56:49 -04004661 def test_whitespace_keeper(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004662 eq = self.assertEqual
4663 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4664 parts = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04004665 eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004666 hdr = make_header(parts)
4667 eq(hdr.encode(),
4668 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4669
4670 def test_broken_base64_header(self):
4671 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004672 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004673 raises(errors.HeaderParseError, decode_header, s)
4674
R. David Murray477efb32011-01-05 01:39:32 +00004675 def test_shift_jis_charset(self):
4676 h = Header('文', charset='shift_jis')
4677 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4678
R David Murrayde912762011-03-16 18:26:23 -04004679 def test_flatten_header_with_no_value(self):
4680 # Issue 11401 (regression from email 4.x) Note that the space after
4681 # the header doesn't reflect the input, but this is also the way
4682 # email 4.x behaved. At some point it would be nice to fix that.
4683 msg = email.message_from_string("EmptyHeader:")
4684 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4685
R David Murray01581ee2011-04-18 10:04:34 -04004686 def test_encode_preserves_leading_ws_on_value(self):
4687 msg = Message()
4688 msg['SomeHeader'] = ' value with leading ws'
4689 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4690
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004691
Ezio Melottib3aedd42010-11-20 19:04:17 +00004692
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004693# Test RFC 2231 header parameters (en/de)coding
4694class TestRFC2231(TestEmailBase):
R David Murray97f43c02012-06-24 05:03:27 -04004695
4696 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
4697 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004698 def test_get_param(self):
4699 eq = self.assertEqual
4700 msg = self._msgobj('msg_29.txt')
4701 eq(msg.get_param('title'),
4702 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4703 eq(msg.get_param('title', unquote=False),
4704 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4705
4706 def test_set_param(self):
4707 eq = self.ndiffAssertEqual
4708 msg = Message()
4709 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4710 charset='us-ascii')
4711 eq(msg.get_param('title'),
4712 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4713 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4714 charset='us-ascii', language='en')
4715 eq(msg.get_param('title'),
4716 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4717 msg = self._msgobj('msg_01.txt')
4718 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4719 charset='us-ascii', language='en')
4720 eq(msg.as_string(maxheaderlen=78), """\
4721Return-Path: <bbb@zzz.org>
4722Delivered-To: bbb@zzz.org
4723Received: by mail.zzz.org (Postfix, from userid 889)
4724\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4725MIME-Version: 1.0
4726Content-Transfer-Encoding: 7bit
4727Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4728From: bbb@ddd.com (John X. Doe)
4729To: bbb@zzz.org
4730Subject: This is a test message
4731Date: Fri, 4 May 2001 14:05:44 -0400
4732Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004733 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004734
4735
4736Hi,
4737
4738Do you like this message?
4739
4740-Me
4741""")
4742
R David Murraya2860e82011-04-16 09:20:30 -04004743 def test_set_param_requote(self):
4744 msg = Message()
4745 msg.set_param('title', 'foo')
4746 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4747 msg.set_param('title', 'bar', requote=False)
4748 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4749 # tspecial is still quoted.
4750 msg.set_param('title', "(bar)bell", requote=False)
4751 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4752
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004753 def test_del_param(self):
4754 eq = self.ndiffAssertEqual
4755 msg = self._msgobj('msg_01.txt')
4756 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4757 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4758 charset='us-ascii', language='en')
4759 msg.del_param('foo', header='Content-Type')
4760 eq(msg.as_string(maxheaderlen=78), """\
4761Return-Path: <bbb@zzz.org>
4762Delivered-To: bbb@zzz.org
4763Received: by mail.zzz.org (Postfix, from userid 889)
4764\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4765MIME-Version: 1.0
4766Content-Transfer-Encoding: 7bit
4767Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4768From: bbb@ddd.com (John X. Doe)
4769To: bbb@zzz.org
4770Subject: This is a test message
4771Date: Fri, 4 May 2001 14:05:44 -0400
4772Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004773 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004774
4775
4776Hi,
4777
4778Do you like this message?
4779
4780-Me
4781""")
4782
R David Murray97f43c02012-06-24 05:03:27 -04004783 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset
4784 # I changed the charset name, though, because the one in the file isn't
4785 # a legal charset name. Should add a test for an illegal charset.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004786 def test_rfc2231_get_content_charset(self):
4787 eq = self.assertEqual
4788 msg = self._msgobj('msg_32.txt')
4789 eq(msg.get_content_charset(), 'us-ascii')
4790
R David Murray97f43c02012-06-24 05:03:27 -04004791 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes
R. David Murraydfd7eb02010-12-24 22:36:49 +00004792 def test_rfc2231_parse_rfc_quoting(self):
4793 m = textwrap.dedent('''\
4794 Content-Disposition: inline;
4795 \tfilename*0*=''This%20is%20even%20more%20;
4796 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4797 \tfilename*2="is it not.pdf"
4798
4799 ''')
4800 msg = email.message_from_string(m)
4801 self.assertEqual(msg.get_filename(),
4802 'This is even more ***fun*** is it not.pdf')
4803 self.assertEqual(m, msg.as_string())
4804
R David Murray97f43c02012-06-24 05:03:27 -04004805 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
R. David Murraydfd7eb02010-12-24 22:36:49 +00004806 def test_rfc2231_parse_extra_quoting(self):
4807 m = textwrap.dedent('''\
4808 Content-Disposition: inline;
4809 \tfilename*0*="''This%20is%20even%20more%20";
4810 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4811 \tfilename*2="is it not.pdf"
4812
4813 ''')
4814 msg = email.message_from_string(m)
4815 self.assertEqual(msg.get_filename(),
4816 'This is even more ***fun*** is it not.pdf')
4817 self.assertEqual(m, msg.as_string())
4818
R David Murray97f43c02012-06-24 05:03:27 -04004819 # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset
4820 # but new test uses *0* because otherwise lang/charset is not valid.
4821 # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004822 def test_rfc2231_no_language_or_charset(self):
4823 m = '''\
4824Content-Transfer-Encoding: 8bit
4825Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4826Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4827
4828'''
4829 msg = email.message_from_string(m)
4830 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004831 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004832 self.assertEqual(
4833 param,
4834 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4835
R David Murray97f43c02012-06-24 05:03:27 -04004836 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004837 def test_rfc2231_no_language_or_charset_in_filename(self):
4838 m = '''\
4839Content-Disposition: inline;
4840\tfilename*0*="''This%20is%20even%20more%20";
4841\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4842\tfilename*2="is it not.pdf"
4843
4844'''
4845 msg = email.message_from_string(m)
4846 self.assertEqual(msg.get_filename(),
4847 'This is even more ***fun*** is it not.pdf')
4848
R David Murray97f43c02012-06-24 05:03:27 -04004849 # Duplicate of previous test?
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004850 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4851 m = '''\
4852Content-Disposition: inline;
4853\tfilename*0*="''This%20is%20even%20more%20";
4854\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4855\tfilename*2="is it not.pdf"
4856
4857'''
4858 msg = email.message_from_string(m)
4859 self.assertEqual(msg.get_filename(),
4860 'This is even more ***fun*** is it not.pdf')
4861
R David Murray97f43c02012-06-24 05:03:27 -04004862 # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded,
4863 # but the test below is wrong (the first part should be decoded).
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004864 def test_rfc2231_partly_encoded(self):
4865 m = '''\
4866Content-Disposition: inline;
4867\tfilename*0="''This%20is%20even%20more%20";
4868\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4869\tfilename*2="is it not.pdf"
4870
4871'''
4872 msg = email.message_from_string(m)
4873 self.assertEqual(
4874 msg.get_filename(),
4875 'This%20is%20even%20more%20***fun*** is it not.pdf')
4876
4877 def test_rfc2231_partly_nonencoded(self):
4878 m = '''\
4879Content-Disposition: inline;
4880\tfilename*0="This%20is%20even%20more%20";
4881\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4882\tfilename*2="is it not.pdf"
4883
4884'''
4885 msg = email.message_from_string(m)
4886 self.assertEqual(
4887 msg.get_filename(),
4888 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4889
4890 def test_rfc2231_no_language_or_charset_in_boundary(self):
4891 m = '''\
4892Content-Type: multipart/alternative;
4893\tboundary*0*="''This%20is%20even%20more%20";
4894\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4895\tboundary*2="is it not.pdf"
4896
4897'''
4898 msg = email.message_from_string(m)
4899 self.assertEqual(msg.get_boundary(),
4900 'This is even more ***fun*** is it not.pdf')
4901
4902 def test_rfc2231_no_language_or_charset_in_charset(self):
4903 # This is a nonsensical charset value, but tests the code anyway
4904 m = '''\
4905Content-Type: text/plain;
4906\tcharset*0*="This%20is%20even%20more%20";
4907\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4908\tcharset*2="is it not.pdf"
4909
4910'''
4911 msg = email.message_from_string(m)
4912 self.assertEqual(msg.get_content_charset(),
4913 'this is even more ***fun*** is it not.pdf')
4914
R David Murray97f43c02012-06-24 05:03:27 -04004915 # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004916 def test_rfc2231_bad_encoding_in_filename(self):
4917 m = '''\
4918Content-Disposition: inline;
4919\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4920\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4921\tfilename*2="is it not.pdf"
4922
4923'''
4924 msg = email.message_from_string(m)
4925 self.assertEqual(msg.get_filename(),
4926 'This is even more ***fun*** is it not.pdf')
4927
4928 def test_rfc2231_bad_encoding_in_charset(self):
4929 m = """\
4930Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4931
4932"""
4933 msg = email.message_from_string(m)
4934 # This should return None because non-ascii characters in the charset
4935 # are not allowed.
4936 self.assertEqual(msg.get_content_charset(), None)
4937
4938 def test_rfc2231_bad_character_in_charset(self):
4939 m = """\
4940Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4941
4942"""
4943 msg = email.message_from_string(m)
4944 # This should return None because non-ascii characters in the charset
4945 # are not allowed.
4946 self.assertEqual(msg.get_content_charset(), None)
4947
4948 def test_rfc2231_bad_character_in_filename(self):
4949 m = '''\
4950Content-Disposition: inline;
4951\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4952\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4953\tfilename*2*="is it not.pdf%E2"
4954
4955'''
4956 msg = email.message_from_string(m)
4957 self.assertEqual(msg.get_filename(),
4958 'This is even more ***fun*** is it not.pdf\ufffd')
4959
4960 def test_rfc2231_unknown_encoding(self):
4961 m = """\
4962Content-Transfer-Encoding: 8bit
4963Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4964
4965"""
4966 msg = email.message_from_string(m)
4967 self.assertEqual(msg.get_filename(), 'myfile.txt')
4968
4969 def test_rfc2231_single_tick_in_filename_extended(self):
4970 eq = self.assertEqual
4971 m = """\
4972Content-Type: application/x-foo;
4973\tname*0*=\"Frank's\"; name*1*=\" Document\"
4974
4975"""
4976 msg = email.message_from_string(m)
4977 charset, language, s = msg.get_param('name')
4978 eq(charset, None)
4979 eq(language, None)
4980 eq(s, "Frank's Document")
4981
R David Murray97f43c02012-06-24 05:03:27 -04004982 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004983 def test_rfc2231_single_tick_in_filename(self):
4984 m = """\
4985Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4986
4987"""
4988 msg = email.message_from_string(m)
4989 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004990 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004991 self.assertEqual(param, "Frank's Document")
4992
R David Murray97f43c02012-06-24 05:03:27 -04004993 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004994 def test_rfc2231_tick_attack_extended(self):
4995 eq = self.assertEqual
4996 m = """\
4997Content-Type: application/x-foo;
4998\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4999
5000"""
5001 msg = email.message_from_string(m)
5002 charset, language, s = msg.get_param('name')
5003 eq(charset, 'us-ascii')
5004 eq(language, 'en-us')
5005 eq(s, "Frank's Document")
5006
R David Murray97f43c02012-06-24 05:03:27 -04005007 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005008 def test_rfc2231_tick_attack(self):
5009 m = """\
5010Content-Type: application/x-foo;
5011\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
5012
5013"""
5014 msg = email.message_from_string(m)
5015 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00005016 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005017 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
5018
R David Murray97f43c02012-06-24 05:03:27 -04005019 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005020 def test_rfc2231_no_extended_values(self):
5021 eq = self.assertEqual
5022 m = """\
5023Content-Type: application/x-foo; name=\"Frank's Document\"
5024
5025"""
5026 msg = email.message_from_string(m)
5027 eq(msg.get_param('name'), "Frank's Document")
5028
R David Murray97f43c02012-06-24 05:03:27 -04005029 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005030 def test_rfc2231_encoded_then_unencoded_segments(self):
5031 eq = self.assertEqual
5032 m = """\
5033Content-Type: application/x-foo;
5034\tname*0*=\"us-ascii'en-us'My\";
5035\tname*1=\" Document\";
5036\tname*2*=\" For You\"
5037
5038"""
5039 msg = email.message_from_string(m)
5040 charset, language, s = msg.get_param('name')
5041 eq(charset, 'us-ascii')
5042 eq(language, 'en-us')
5043 eq(s, 'My Document For You')
5044
R David Murray97f43c02012-06-24 05:03:27 -04005045 # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments
5046 # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005047 def test_rfc2231_unencoded_then_encoded_segments(self):
5048 eq = self.assertEqual
5049 m = """\
5050Content-Type: application/x-foo;
5051\tname*0=\"us-ascii'en-us'My\";
5052\tname*1*=\" Document\";
5053\tname*2*=\" For You\"
5054
5055"""
5056 msg = email.message_from_string(m)
5057 charset, language, s = msg.get_param('name')
5058 eq(charset, 'us-ascii')
5059 eq(language, 'en-us')
5060 eq(s, 'My Document For You')
5061
5062
Ezio Melottib3aedd42010-11-20 19:04:17 +00005063
R. David Murraya8f480f2010-01-16 18:30:03 +00005064# Tests to ensure that signed parts of an email are completely preserved, as
5065# required by RFC1847 section 2.1. Note that these are incomplete, because the
5066# email package does not currently always preserve the body. See issue 1670765.
5067class TestSigned(TestEmailBase):
5068
5069 def _msg_and_obj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -04005070 with openfile(filename) as fp:
R. David Murraya8f480f2010-01-16 18:30:03 +00005071 original = fp.read()
5072 msg = email.message_from_string(original)
5073 return original, msg
5074
5075 def _signed_parts_eq(self, original, result):
5076 # Extract the first mime part of each message
5077 import re
5078 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
5079 inpart = repart.search(original).group(2)
5080 outpart = repart.search(result).group(2)
5081 self.assertEqual(outpart, inpart)
5082
5083 def test_long_headers_as_string(self):
5084 original, msg = self._msg_and_obj('msg_45.txt')
5085 result = msg.as_string()
5086 self._signed_parts_eq(original, result)
5087
5088 def test_long_headers_as_string_maxheaderlen(self):
5089 original, msg = self._msg_and_obj('msg_45.txt')
5090 result = msg.as_string(maxheaderlen=60)
5091 self._signed_parts_eq(original, result)
5092
5093 def test_long_headers_flatten(self):
5094 original, msg = self._msg_and_obj('msg_45.txt')
5095 fp = StringIO()
5096 Generator(fp).flatten(msg)
5097 result = fp.getvalue()
5098 self._signed_parts_eq(original, result)
5099
5100
Ezio Melottib3aedd42010-11-20 19:04:17 +00005101
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005102if __name__ == '__main__':
R David Murray9aaba782011-03-21 17:17:06 -04005103 unittest.main()