blob: 8757924522717d5a2817e0871223e0a098c7e319 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
R. David Murray719a4492010-11-21 16:53:48 +00005import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00006import time
7import base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +00008import unittest
R. David Murray96fd54e2010-10-08 15:55:28 +00009import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000010
R. David Murray96fd54e2010-10-08 15:55:28 +000011from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000012from itertools import chain
13
14import email
R David Murrayc27e5222012-05-25 15:01:48 -040015import email.policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016
17from email.charset import Charset
18from email.header import Header, decode_header, make_header
19from email.parser import Parser, HeaderParser
R David Murray638d40b2012-08-24 11:14:13 -040020from email.generator import Generator, DecodedGenerator, BytesGenerator
Guido van Rossum8b3febe2007-08-30 01:15:14 +000021from email.message import Message
22from email.mime.application import MIMEApplication
23from email.mime.audio import MIMEAudio
24from email.mime.text import MIMEText
25from email.mime.image import MIMEImage
26from email.mime.base import MIMEBase
27from email.mime.message import MIMEMessage
28from email.mime.multipart import MIMEMultipart
29from email import utils
30from email import errors
31from email import encoders
32from email import iterators
33from email import base64mime
34from email import quoprimime
35
R David Murray965794e2013-03-07 18:16:47 -050036from test.support import unlink
R David Murraya256bac2011-03-31 12:20:23 -040037from test.test_email import openfile, TestEmailBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +000038
R David Murray612528d2013-03-15 20:38:15 -040039# These imports are documented to work, but we are testing them using a
40# different path, so we import them here just to make sure they are importable.
41from email.parser import FeedParser, BytesFeedParser
42
Guido van Rossum8b3febe2007-08-30 01:15:14 +000043NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Guido van Rossum8b3febe2007-08-30 01:15:14 +000048# Test various aspects of the Message class's API
49class TestMessageAPI(TestEmailBase):
50 def test_get_all(self):
51 eq = self.assertEqual
52 msg = self._msgobj('msg_20.txt')
53 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
54 eq(msg.get_all('xx', 'n/a'), 'n/a')
55
R. David Murraye5db2632010-11-20 15:10:13 +000056 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000057 eq = self.assertEqual
58 msg = Message()
59 eq(msg.get_charset(), None)
60 charset = Charset('iso-8859-1')
61 msg.set_charset(charset)
62 eq(msg['mime-version'], '1.0')
63 eq(msg.get_content_type(), 'text/plain')
64 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
65 eq(msg.get_param('charset'), 'iso-8859-1')
66 eq(msg['content-transfer-encoding'], 'quoted-printable')
67 eq(msg.get_charset().input_charset, 'iso-8859-1')
68 # Remove the charset
69 msg.set_charset(None)
70 eq(msg.get_charset(), None)
71 eq(msg['content-type'], 'text/plain')
72 # Try adding a charset when there's already MIME headers present
73 msg = Message()
74 msg['MIME-Version'] = '2.0'
75 msg['Content-Type'] = 'text/x-weird'
76 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
77 msg.set_charset(charset)
78 eq(msg['mime-version'], '2.0')
79 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
80 eq(msg['content-transfer-encoding'], 'quinted-puntable')
81
82 def test_set_charset_from_string(self):
83 eq = self.assertEqual
84 msg = Message()
85 msg.set_charset('us-ascii')
86 eq(msg.get_charset().input_charset, 'us-ascii')
87 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
88
89 def test_set_payload_with_charset(self):
90 msg = Message()
91 charset = Charset('iso-8859-1')
92 msg.set_payload('This is a string payload', charset)
93 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
94
95 def test_get_charsets(self):
96 eq = self.assertEqual
97
98 msg = self._msgobj('msg_08.txt')
99 charsets = msg.get_charsets()
100 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
101
102 msg = self._msgobj('msg_09.txt')
103 charsets = msg.get_charsets('dingbat')
104 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
105 'koi8-r'])
106
107 msg = self._msgobj('msg_12.txt')
108 charsets = msg.get_charsets()
109 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
110 'iso-8859-3', 'us-ascii', 'koi8-r'])
111
112 def test_get_filename(self):
113 eq = self.assertEqual
114
115 msg = self._msgobj('msg_04.txt')
116 filenames = [p.get_filename() for p in msg.get_payload()]
117 eq(filenames, ['msg.txt', 'msg.txt'])
118
119 msg = self._msgobj('msg_07.txt')
120 subpart = msg.get_payload(1)
121 eq(subpart.get_filename(), 'dingusfish.gif')
122
123 def test_get_filename_with_name_parameter(self):
124 eq = self.assertEqual
125
126 msg = self._msgobj('msg_44.txt')
127 filenames = [p.get_filename() for p in msg.get_payload()]
128 eq(filenames, ['msg.txt', 'msg.txt'])
129
130 def test_get_boundary(self):
131 eq = self.assertEqual
132 msg = self._msgobj('msg_07.txt')
133 # No quotes!
134 eq(msg.get_boundary(), 'BOUNDARY')
135
136 def test_set_boundary(self):
137 eq = self.assertEqual
138 # This one has no existing boundary parameter, but the Content-Type:
139 # header appears fifth.
140 msg = self._msgobj('msg_01.txt')
141 msg.set_boundary('BOUNDARY')
142 header, value = msg.items()[4]
143 eq(header.lower(), 'content-type')
144 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
145 # This one has a Content-Type: header, with a boundary, stuck in the
146 # middle of its headers. Make sure the order is preserved; it should
147 # be fifth.
148 msg = self._msgobj('msg_04.txt')
149 msg.set_boundary('BOUNDARY')
150 header, value = msg.items()[4]
151 eq(header.lower(), 'content-type')
152 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
153 # And this one has no Content-Type: header at all.
154 msg = self._msgobj('msg_03.txt')
155 self.assertRaises(errors.HeaderParseError,
156 msg.set_boundary, 'BOUNDARY')
157
R. David Murray73a559d2010-12-21 18:07:59 +0000158 def test_make_boundary(self):
159 msg = MIMEMultipart('form-data')
160 # Note that when the boundary gets created is an implementation
161 # detail and might change.
162 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
163 # Trigger creation of boundary
164 msg.as_string()
165 self.assertEqual(msg.items()[0][1][:33],
166 'multipart/form-data; boundary="==')
167 # XXX: there ought to be tests of the uniqueness of the boundary, too.
168
R. David Murray57c45ac2010-02-21 04:39:40 +0000169 def test_message_rfc822_only(self):
170 # Issue 7970: message/rfc822 not in multipart parsed by
171 # HeaderParser caused an exception when flattened.
R David Murray28346b82011-03-31 11:40:20 -0400172 with openfile('msg_46.txt') as fp:
Brett Cannon384917a2010-10-29 23:08:36 +0000173 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000174 parser = HeaderParser()
175 msg = parser.parsestr(msgdata)
176 out = StringIO()
177 gen = Generator(out, True, 0)
178 gen.flatten(msg, False)
179 self.assertEqual(out.getvalue(), msgdata)
180
R David Murrayb35c8502011-04-13 16:46:05 -0400181 def test_byte_message_rfc822_only(self):
182 # Make sure new bytes header parser also passes this.
183 with openfile('msg_46.txt', 'rb') as fp:
184 msgdata = fp.read()
185 parser = email.parser.BytesHeaderParser()
186 msg = parser.parsebytes(msgdata)
187 out = BytesIO()
188 gen = email.generator.BytesGenerator(out)
189 gen.flatten(msg)
190 self.assertEqual(out.getvalue(), msgdata)
191
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000192 def test_get_decoded_payload(self):
193 eq = self.assertEqual
194 msg = self._msgobj('msg_10.txt')
195 # The outer message is a multipart
196 eq(msg.get_payload(decode=True), None)
197 # Subpart 1 is 7bit encoded
198 eq(msg.get_payload(0).get_payload(decode=True),
199 b'This is a 7bit encoded message.\n')
200 # Subpart 2 is quopri
201 eq(msg.get_payload(1).get_payload(decode=True),
202 b'\xa1This is a Quoted Printable encoded message!\n')
203 # Subpart 3 is base64
204 eq(msg.get_payload(2).get_payload(decode=True),
205 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000206 # Subpart 4 is base64 with a trailing newline, which
207 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000208 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000209 b'This is a Base64 encoded message.\n')
210 # Subpart 5 has no Content-Transfer-Encoding: header.
211 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000212 b'This has no Content-Transfer-Encoding: header.\n')
213
214 def test_get_decoded_uu_payload(self):
215 eq = self.assertEqual
216 msg = Message()
217 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
218 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
219 msg['content-transfer-encoding'] = cte
220 eq(msg.get_payload(decode=True), b'hello world')
221 # Now try some bogus data
222 msg.set_payload('foo')
223 eq(msg.get_payload(decode=True), b'foo')
224
R David Murraya2860e82011-04-16 09:20:30 -0400225 def test_get_payload_n_raises_on_non_multipart(self):
226 msg = Message()
227 self.assertRaises(TypeError, msg.get_payload, 1)
228
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000229 def test_decoded_generator(self):
230 eq = self.assertEqual
231 msg = self._msgobj('msg_07.txt')
232 with openfile('msg_17.txt') as fp:
233 text = fp.read()
234 s = StringIO()
235 g = DecodedGenerator(s)
236 g.flatten(msg)
237 eq(s.getvalue(), text)
238
239 def test__contains__(self):
240 msg = Message()
241 msg['From'] = 'Me'
242 msg['to'] = 'You'
243 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000244 self.assertTrue('from' in msg)
245 self.assertTrue('From' in msg)
246 self.assertTrue('FROM' in msg)
247 self.assertTrue('to' in msg)
248 self.assertTrue('To' in msg)
249 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000250
251 def test_as_string(self):
252 eq = self.ndiffAssertEqual
253 msg = self._msgobj('msg_01.txt')
254 with openfile('msg_01.txt') as fp:
255 text = fp.read()
256 eq(text, str(msg))
257 fullrepr = msg.as_string(unixfrom=True)
258 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000259 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000260 eq(text, NL.join(lines[1:]))
261
R David Murray97f43c02012-06-24 05:03:27 -0400262 # test_headerregistry.TestContentTypeHeader.bad_params
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000263 def test_bad_param(self):
264 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
265 self.assertEqual(msg.get_param('baz'), '')
266
267 def test_missing_filename(self):
268 msg = email.message_from_string("From: foo\n")
269 self.assertEqual(msg.get_filename(), None)
270
271 def test_bogus_filename(self):
272 msg = email.message_from_string(
273 "Content-Disposition: blarg; filename\n")
274 self.assertEqual(msg.get_filename(), '')
275
276 def test_missing_boundary(self):
277 msg = email.message_from_string("From: foo\n")
278 self.assertEqual(msg.get_boundary(), None)
279
280 def test_get_params(self):
281 eq = self.assertEqual
282 msg = email.message_from_string(
283 'X-Header: foo=one; bar=two; baz=three\n')
284 eq(msg.get_params(header='x-header'),
285 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
286 msg = email.message_from_string(
287 'X-Header: foo; bar=one; baz=two\n')
288 eq(msg.get_params(header='x-header'),
289 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
290 eq(msg.get_params(), None)
291 msg = email.message_from_string(
292 'X-Header: foo; bar="one"; baz=two\n')
293 eq(msg.get_params(header='x-header'),
294 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
295
R David Murray97f43c02012-06-24 05:03:27 -0400296 # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000297 def test_get_param_liberal(self):
298 msg = Message()
299 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
300 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
301
302 def test_get_param(self):
303 eq = self.assertEqual
304 msg = email.message_from_string(
305 "X-Header: foo=one; bar=two; baz=three\n")
306 eq(msg.get_param('bar', header='x-header'), 'two')
307 eq(msg.get_param('quuz', header='x-header'), None)
308 eq(msg.get_param('quuz'), None)
309 msg = email.message_from_string(
310 'X-Header: foo; bar="one"; baz=two\n')
311 eq(msg.get_param('foo', header='x-header'), '')
312 eq(msg.get_param('bar', header='x-header'), 'one')
313 eq(msg.get_param('baz', header='x-header'), 'two')
314 # XXX: We are not RFC-2045 compliant! We cannot parse:
315 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
316 # msg.get_param("weird")
317 # yet.
318
R David Murray97f43c02012-06-24 05:03:27 -0400319 # test_headerregistry.TestContentTypeHeader.spaces_around_semis
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000320 def test_get_param_funky_continuation_lines(self):
321 msg = self._msgobj('msg_22.txt')
322 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
323
R David Murray97f43c02012-06-24 05:03:27 -0400324 # test_headerregistry.TestContentTypeHeader.semis_inside_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000325 def test_get_param_with_semis_in_quotes(self):
326 msg = email.message_from_string(
327 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
328 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
329 self.assertEqual(msg.get_param('name', unquote=False),
330 '"Jim&amp;&amp;Jill"')
331
R David Murray97f43c02012-06-24 05:03:27 -0400332 # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value
R. David Murrayd48739f2010-04-14 18:59:18 +0000333 def test_get_param_with_quotes(self):
334 msg = email.message_from_string(
335 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
336 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
337 msg = email.message_from_string(
338 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
339 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
340
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000341 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000342 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000343 msg = email.message_from_string('Header: exists')
344 unless('header' in msg)
345 unless('Header' in msg)
346 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000347 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000348
349 def test_set_param(self):
350 eq = self.assertEqual
351 msg = Message()
352 msg.set_param('charset', 'iso-2022-jp')
353 eq(msg.get_param('charset'), 'iso-2022-jp')
354 msg.set_param('importance', 'high value')
355 eq(msg.get_param('importance'), 'high value')
356 eq(msg.get_param('importance', unquote=False), '"high value"')
357 eq(msg.get_params(), [('text/plain', ''),
358 ('charset', 'iso-2022-jp'),
359 ('importance', 'high value')])
360 eq(msg.get_params(unquote=False), [('text/plain', ''),
361 ('charset', '"iso-2022-jp"'),
362 ('importance', '"high value"')])
363 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
364 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
365
366 def test_del_param(self):
367 eq = self.assertEqual
368 msg = self._msgobj('msg_05.txt')
369 eq(msg.get_params(),
370 [('multipart/report', ''), ('report-type', 'delivery-status'),
371 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
372 old_val = msg.get_param("report-type")
373 msg.del_param("report-type")
374 eq(msg.get_params(),
375 [('multipart/report', ''),
376 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
377 msg.set_param("report-type", old_val)
378 eq(msg.get_params(),
379 [('multipart/report', ''),
380 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
381 ('report-type', old_val)])
382
383 def test_del_param_on_other_header(self):
384 msg = Message()
385 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
386 msg.del_param('filename', 'content-disposition')
387 self.assertEqual(msg['content-disposition'], 'attachment')
388
R David Murraya2860e82011-04-16 09:20:30 -0400389 def test_del_param_on_nonexistent_header(self):
390 msg = Message()
391 msg.del_param('filename', 'content-disposition')
392
393 def test_del_nonexistent_param(self):
394 msg = Message()
395 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
396 existing_header = msg['Content-Type']
397 msg.del_param('foobar', header='Content-Type')
398 self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"')
399
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000400 def test_set_type(self):
401 eq = self.assertEqual
402 msg = Message()
403 self.assertRaises(ValueError, msg.set_type, 'text')
404 msg.set_type('text/plain')
405 eq(msg['content-type'], 'text/plain')
406 msg.set_param('charset', 'us-ascii')
407 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
408 msg.set_type('text/html')
409 eq(msg['content-type'], 'text/html; charset="us-ascii"')
410
411 def test_set_type_on_other_header(self):
412 msg = Message()
413 msg['X-Content-Type'] = 'text/plain'
414 msg.set_type('application/octet-stream', 'X-Content-Type')
415 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
416
417 def test_get_content_type_missing(self):
418 msg = Message()
419 self.assertEqual(msg.get_content_type(), 'text/plain')
420
421 def test_get_content_type_missing_with_default_type(self):
422 msg = Message()
423 msg.set_default_type('message/rfc822')
424 self.assertEqual(msg.get_content_type(), 'message/rfc822')
425
426 def test_get_content_type_from_message_implicit(self):
427 msg = self._msgobj('msg_30.txt')
428 self.assertEqual(msg.get_payload(0).get_content_type(),
429 'message/rfc822')
430
431 def test_get_content_type_from_message_explicit(self):
432 msg = self._msgobj('msg_28.txt')
433 self.assertEqual(msg.get_payload(0).get_content_type(),
434 'message/rfc822')
435
436 def test_get_content_type_from_message_text_plain_implicit(self):
437 msg = self._msgobj('msg_03.txt')
438 self.assertEqual(msg.get_content_type(), 'text/plain')
439
440 def test_get_content_type_from_message_text_plain_explicit(self):
441 msg = self._msgobj('msg_01.txt')
442 self.assertEqual(msg.get_content_type(), 'text/plain')
443
444 def test_get_content_maintype_missing(self):
445 msg = Message()
446 self.assertEqual(msg.get_content_maintype(), 'text')
447
448 def test_get_content_maintype_missing_with_default_type(self):
449 msg = Message()
450 msg.set_default_type('message/rfc822')
451 self.assertEqual(msg.get_content_maintype(), 'message')
452
453 def test_get_content_maintype_from_message_implicit(self):
454 msg = self._msgobj('msg_30.txt')
455 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
456
457 def test_get_content_maintype_from_message_explicit(self):
458 msg = self._msgobj('msg_28.txt')
459 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
460
461 def test_get_content_maintype_from_message_text_plain_implicit(self):
462 msg = self._msgobj('msg_03.txt')
463 self.assertEqual(msg.get_content_maintype(), 'text')
464
465 def test_get_content_maintype_from_message_text_plain_explicit(self):
466 msg = self._msgobj('msg_01.txt')
467 self.assertEqual(msg.get_content_maintype(), 'text')
468
469 def test_get_content_subtype_missing(self):
470 msg = Message()
471 self.assertEqual(msg.get_content_subtype(), 'plain')
472
473 def test_get_content_subtype_missing_with_default_type(self):
474 msg = Message()
475 msg.set_default_type('message/rfc822')
476 self.assertEqual(msg.get_content_subtype(), 'rfc822')
477
478 def test_get_content_subtype_from_message_implicit(self):
479 msg = self._msgobj('msg_30.txt')
480 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
481
482 def test_get_content_subtype_from_message_explicit(self):
483 msg = self._msgobj('msg_28.txt')
484 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
485
486 def test_get_content_subtype_from_message_text_plain_implicit(self):
487 msg = self._msgobj('msg_03.txt')
488 self.assertEqual(msg.get_content_subtype(), 'plain')
489
490 def test_get_content_subtype_from_message_text_plain_explicit(self):
491 msg = self._msgobj('msg_01.txt')
492 self.assertEqual(msg.get_content_subtype(), 'plain')
493
494 def test_get_content_maintype_error(self):
495 msg = Message()
496 msg['Content-Type'] = 'no-slash-in-this-string'
497 self.assertEqual(msg.get_content_maintype(), 'text')
498
499 def test_get_content_subtype_error(self):
500 msg = Message()
501 msg['Content-Type'] = 'no-slash-in-this-string'
502 self.assertEqual(msg.get_content_subtype(), 'plain')
503
504 def test_replace_header(self):
505 eq = self.assertEqual
506 msg = Message()
507 msg.add_header('First', 'One')
508 msg.add_header('Second', 'Two')
509 msg.add_header('Third', 'Three')
510 eq(msg.keys(), ['First', 'Second', 'Third'])
511 eq(msg.values(), ['One', 'Two', 'Three'])
512 msg.replace_header('Second', 'Twenty')
513 eq(msg.keys(), ['First', 'Second', 'Third'])
514 eq(msg.values(), ['One', 'Twenty', 'Three'])
515 msg.add_header('First', 'Eleven')
516 msg.replace_header('First', 'One Hundred')
517 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
518 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
519 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
520
R David Murray80e0aee2012-05-27 21:23:34 -0400521 # test_defect_handling:test_invalid_chars_in_base64_payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000522 def test_broken_base64_payload(self):
523 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
524 msg = Message()
525 msg['content-type'] = 'audio/x-midi'
526 msg['content-transfer-encoding'] = 'base64'
527 msg.set_payload(x)
528 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -0400529 (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0'
530 b'\xa1\x00p\xf6\xbf\xe9\x0f'))
531 self.assertIsInstance(msg.defects[0],
532 errors.InvalidBase64CharactersDefect)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000533
R David Murraya2860e82011-04-16 09:20:30 -0400534 def test_broken_unicode_payload(self):
535 # This test improves coverage but is not a compliance test.
536 # The behavior in this situation is currently undefined by the API.
537 x = 'this is a br\xf6ken thing to do'
538 msg = Message()
539 msg['content-type'] = 'text/plain'
540 msg['content-transfer-encoding'] = '8bit'
541 msg.set_payload(x)
542 self.assertEqual(msg.get_payload(decode=True),
543 bytes(x, 'raw-unicode-escape'))
544
545 def test_questionable_bytes_payload(self):
546 # This test improves coverage but is not a compliance test,
547 # since it involves poking inside the black box.
548 x = 'this is a quéstionable thing to do'.encode('utf-8')
549 msg = Message()
550 msg['content-type'] = 'text/plain; charset="utf-8"'
551 msg['content-transfer-encoding'] = '8bit'
552 msg._payload = x
553 self.assertEqual(msg.get_payload(decode=True), x)
554
R. David Murray7ec754b2010-12-13 23:51:19 +0000555 # Issue 1078919
556 def test_ascii_add_header(self):
557 msg = Message()
558 msg.add_header('Content-Disposition', 'attachment',
559 filename='bud.gif')
560 self.assertEqual('attachment; filename="bud.gif"',
561 msg['Content-Disposition'])
562
563 def test_noascii_add_header(self):
564 msg = Message()
565 msg.add_header('Content-Disposition', 'attachment',
566 filename="Fußballer.ppt")
567 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000568 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000569 msg['Content-Disposition'])
570
571 def test_nonascii_add_header_via_triple(self):
572 msg = Message()
573 msg.add_header('Content-Disposition', 'attachment',
574 filename=('iso-8859-1', '', 'Fußballer.ppt'))
575 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000576 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
577 msg['Content-Disposition'])
578
579 def test_ascii_add_header_with_tspecial(self):
580 msg = Message()
581 msg.add_header('Content-Disposition', 'attachment',
582 filename="windows [filename].ppt")
583 self.assertEqual(
584 'attachment; filename="windows [filename].ppt"',
585 msg['Content-Disposition'])
586
587 def test_nonascii_add_header_with_tspecial(self):
588 msg = Message()
589 msg.add_header('Content-Disposition', 'attachment',
590 filename="Fußballer [filename].ppt")
591 self.assertEqual(
592 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000593 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000594
R David Murraya2860e82011-04-16 09:20:30 -0400595 def test_add_header_with_name_only_param(self):
596 msg = Message()
597 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
598 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
599
600 def test_add_header_with_no_value(self):
601 msg = Message()
602 msg.add_header('X-Status', None)
603 self.assertEqual('', msg['X-Status'])
604
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000605 # Issue 5871: reject an attempt to embed a header inside a header value
606 # (header injection attack).
607 def test_embeded_header_via_Header_rejected(self):
608 msg = Message()
609 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
610 self.assertRaises(errors.HeaderParseError, msg.as_string)
611
612 def test_embeded_header_via_string_rejected(self):
613 msg = Message()
614 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
615 self.assertRaises(errors.HeaderParseError, msg.as_string)
616
R David Murray7441a7a2012-03-14 02:59:51 -0400617 def test_unicode_header_defaults_to_utf8_encoding(self):
618 # Issue 14291
619 m = MIMEText('abc\n')
620 m['Subject'] = 'É test'
621 self.assertEqual(str(m),textwrap.dedent("""\
622 Content-Type: text/plain; charset="us-ascii"
623 MIME-Version: 1.0
624 Content-Transfer-Encoding: 7bit
625 Subject: =?utf-8?q?=C3=89_test?=
626
627 abc
628 """))
629
R David Murray8680bcc2012-03-22 22:17:51 -0400630 def test_unicode_body_defaults_to_utf8_encoding(self):
631 # Issue 14291
632 m = MIMEText('É testabc\n')
633 self.assertEqual(str(m),textwrap.dedent("""\
R David Murray8680bcc2012-03-22 22:17:51 -0400634 Content-Type: text/plain; charset="utf-8"
R David Murray42243c42012-03-22 22:40:44 -0400635 MIME-Version: 1.0
R David Murray8680bcc2012-03-22 22:17:51 -0400636 Content-Transfer-Encoding: base64
637
638 w4kgdGVzdGFiYwo=
639 """))
640
641
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000642# Test the email.encoders module
643class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400644
645 def test_EncodersEncode_base64(self):
646 with openfile('PyBanner048.gif', 'rb') as fp:
647 bindata = fp.read()
648 mimed = email.mime.image.MIMEImage(bindata)
649 base64ed = mimed.get_payload()
650 # the transfer-encoded body lines should all be <=76 characters
651 lines = base64ed.split('\n')
652 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
653
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000654 def test_encode_empty_payload(self):
655 eq = self.assertEqual
656 msg = Message()
657 msg.set_charset('us-ascii')
658 eq(msg['content-transfer-encoding'], '7bit')
659
660 def test_default_cte(self):
661 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000662 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000663 msg = MIMEText('hello world')
664 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000665 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000666 msg = MIMEText('hello \xf8 world')
R David Murray8680bcc2012-03-22 22:17:51 -0400667 eq(msg['content-transfer-encoding'], 'base64')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000668 # And now with a different charset
669 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
670 eq(msg['content-transfer-encoding'], 'quoted-printable')
671
R. David Murraye85200d2010-05-06 01:41:14 +0000672 def test_encode7or8bit(self):
673 # Make sure a charset whose input character set is 8bit but
674 # whose output character set is 7bit gets a transfer-encoding
675 # of 7bit.
676 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000677 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000678 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000679
R David Murrayf581b372013-02-05 10:49:49 -0500680 def test_qp_encode_latin1(self):
681 msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1')
682 self.assertEqual(str(msg), textwrap.dedent("""\
683 MIME-Version: 1.0
684 Content-Type: text/text; charset="iso-8859-1"
685 Content-Transfer-Encoding: quoted-printable
686
687 =E1=F6
688 """))
689
690 def test_qp_encode_non_latin1(self):
691 # Issue 16948
692 msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2')
693 self.assertEqual(str(msg), textwrap.dedent("""\
694 MIME-Version: 1.0
695 Content-Type: text/text; charset="iso-8859-2"
696 Content-Transfer-Encoding: quoted-printable
697
698 =BF
699 """))
700
Ezio Melottib3aedd42010-11-20 19:04:17 +0000701
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000702# Test long header wrapping
703class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400704
705 maxDiff = None
706
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000707 def test_split_long_continuation(self):
708 eq = self.ndiffAssertEqual
709 msg = email.message_from_string("""\
710Subject: bug demonstration
711\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
712\tmore text
713
714test
715""")
716 sfp = StringIO()
717 g = Generator(sfp)
718 g.flatten(msg)
719 eq(sfp.getvalue(), """\
720Subject: bug demonstration
721\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
722\tmore text
723
724test
725""")
726
727 def test_another_long_almost_unsplittable_header(self):
728 eq = self.ndiffAssertEqual
729 hstr = """\
730bug demonstration
731\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
732\tmore text"""
733 h = Header(hstr, continuation_ws='\t')
734 eq(h.encode(), """\
735bug demonstration
736\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
737\tmore text""")
738 h = Header(hstr.replace('\t', ' '))
739 eq(h.encode(), """\
740bug demonstration
741 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
742 more text""")
743
744 def test_long_nonstring(self):
745 eq = self.ndiffAssertEqual
746 g = Charset("iso-8859-1")
747 cz = Charset("iso-8859-2")
748 utf8 = Charset("utf-8")
749 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
750 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
751 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
752 b'bef\xf6rdert. ')
753 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
754 b'd\xf9vtipu.. ')
755 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
756 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
757 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
758 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
759 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
760 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
761 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
762 '\u3044\u307e\u3059\u3002')
763 h = Header(g_head, g, header_name='Subject')
764 h.append(cz_head, cz)
765 h.append(utf8_head, utf8)
766 msg = Message()
767 msg['Subject'] = h
768 sfp = StringIO()
769 g = Generator(sfp)
770 g.flatten(msg)
771 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000772Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
773 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
774 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
775 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
776 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
777 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
778 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
779 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
780 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
781 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
782 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000783
784""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000785 eq(h.encode(maxlinelen=76), """\
786=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
787 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
788 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
789 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
790 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
791 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
792 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
793 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
794 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
795 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
796 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000797
798 def test_long_header_encode(self):
799 eq = self.ndiffAssertEqual
800 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
801 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
802 header_name='X-Foobar-Spoink-Defrobnit')
803 eq(h.encode(), '''\
804wasnipoop; giraffes="very-long-necked-animals";
805 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
806
807 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
808 eq = self.ndiffAssertEqual
809 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
810 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
811 header_name='X-Foobar-Spoink-Defrobnit',
812 continuation_ws='\t')
813 eq(h.encode(), '''\
814wasnipoop; giraffes="very-long-necked-animals";
815 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
816
817 def test_long_header_encode_with_tab_continuation(self):
818 eq = self.ndiffAssertEqual
819 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
820 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
821 header_name='X-Foobar-Spoink-Defrobnit',
822 continuation_ws='\t')
823 eq(h.encode(), '''\
824wasnipoop; giraffes="very-long-necked-animals";
825\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
826
R David Murray3a6152f2011-03-14 21:13:03 -0400827 def test_header_encode_with_different_output_charset(self):
828 h = Header('文', 'euc-jp')
829 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
830
831 def test_long_header_encode_with_different_output_charset(self):
832 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
833 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
834 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
835 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
836 res = """\
837=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
838 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
839 self.assertEqual(h.encode(), res)
840
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000841 def test_header_splitter(self):
842 eq = self.ndiffAssertEqual
843 msg = MIMEText('')
844 # It'd be great if we could use add_header() here, but that doesn't
845 # guarantee an order of the parameters.
846 msg['X-Foobar-Spoink-Defrobnit'] = (
847 'wasnipoop; giraffes="very-long-necked-animals"; '
848 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
849 sfp = StringIO()
850 g = Generator(sfp)
851 g.flatten(msg)
852 eq(sfp.getvalue(), '''\
853Content-Type: text/plain; charset="us-ascii"
854MIME-Version: 1.0
855Content-Transfer-Encoding: 7bit
856X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
857 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
858
859''')
860
861 def test_no_semis_header_splitter(self):
862 eq = self.ndiffAssertEqual
863 msg = Message()
864 msg['From'] = 'test@dom.ain'
865 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
866 msg.set_payload('Test')
867 sfp = StringIO()
868 g = Generator(sfp)
869 g.flatten(msg)
870 eq(sfp.getvalue(), """\
871From: test@dom.ain
872References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
873 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
874
875Test""")
876
R David Murray7da4db12011-04-07 20:37:17 -0400877 def test_last_split_chunk_does_not_fit(self):
878 eq = self.ndiffAssertEqual
879 h = Header('Subject: the first part of this is short, but_the_second'
880 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
881 '_all_by_itself')
882 eq(h.encode(), """\
883Subject: the first part of this is short,
884 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
885
886 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
887 eq = self.ndiffAssertEqual
888 h = Header(', but_the_second'
889 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
890 '_all_by_itself')
891 eq(h.encode(), """\
892,
893 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
894
895 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
896 eq = self.ndiffAssertEqual
897 h = Header(', , but_the_second'
898 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
899 '_all_by_itself')
900 eq(h.encode(), """\
901, ,
902 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
903
904 def test_trailing_splitable_on_overlong_unsplitable(self):
905 eq = self.ndiffAssertEqual
906 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
907 'be_on_a_line_all_by_itself;')
908 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
909 "be_on_a_line_all_by_itself;")
910
911 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
912 eq = self.ndiffAssertEqual
913 h = Header('; '
914 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400915 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400916 eq(h.encode(), """\
917;
R David Murray01581ee2011-04-18 10:04:34 -0400918 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400919
R David Murraye1292a22011-04-07 20:54:03 -0400920 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400921 eq = self.ndiffAssertEqual
922 h = Header('This is a long line that has two whitespaces in a row. '
923 'This used to cause truncation of the header when folded')
924 eq(h.encode(), """\
925This is a long line that has two whitespaces in a row. This used to cause
926 truncation of the header when folded""")
927
R David Murray01581ee2011-04-18 10:04:34 -0400928 def test_splitter_split_on_punctuation_only_if_fws(self):
929 eq = self.ndiffAssertEqual
930 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
931 'they;arenotlegal;fold,points')
932 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
933 "arenotlegal;fold,points")
934
935 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
936 eq = self.ndiffAssertEqual
937 h = Header('this is a test where we need to have more than one line '
938 'before; our final line that is just too big to fit;; '
939 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
940 'be_on_a_line_all_by_itself;')
941 eq(h.encode(), """\
942this is a test where we need to have more than one line before;
943 our final line that is just too big to fit;;
944 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
945
946 def test_overlong_last_part_followed_by_split_point(self):
947 eq = self.ndiffAssertEqual
948 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
949 'be_on_a_line_all_by_itself ')
950 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
951 "should_be_on_a_line_all_by_itself ")
952
953 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
954 eq = self.ndiffAssertEqual
955 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
956 'before_our_final_line_; ; '
957 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
958 'be_on_a_line_all_by_itself; ')
959 eq(h.encode(), """\
960this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
961 ;
962 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
963
964 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
965 eq = self.ndiffAssertEqual
966 h = Header('this is a test where we need to have more than one line '
967 'before our final line; ; '
968 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
969 'be_on_a_line_all_by_itself; ')
970 eq(h.encode(), """\
971this is a test where we need to have more than one line before our final line;
972 ;
973 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
974
975 def test_long_header_with_whitespace_runs(self):
976 eq = self.ndiffAssertEqual
977 msg = Message()
978 msg['From'] = 'test@dom.ain'
979 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
980 msg.set_payload('Test')
981 sfp = StringIO()
982 g = Generator(sfp)
983 g.flatten(msg)
984 eq(sfp.getvalue(), """\
985From: test@dom.ain
986References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
987 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
988 <foo@dom.ain> <foo@dom.ain>\x20\x20
989
990Test""")
991
992 def test_long_run_with_semi_header_splitter(self):
993 eq = self.ndiffAssertEqual
994 msg = Message()
995 msg['From'] = 'test@dom.ain'
996 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
997 msg.set_payload('Test')
998 sfp = StringIO()
999 g = Generator(sfp)
1000 g.flatten(msg)
1001 eq(sfp.getvalue(), """\
1002From: test@dom.ain
1003References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1004 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1005 <foo@dom.ain>; abc
1006
1007Test""")
1008
1009 def test_splitter_split_on_punctuation_only_if_fws(self):
1010 eq = self.ndiffAssertEqual
1011 msg = Message()
1012 msg['From'] = 'test@dom.ain'
1013 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
1014 'they;arenotlegal;fold,points')
1015 msg.set_payload('Test')
1016 sfp = StringIO()
1017 g = Generator(sfp)
1018 g.flatten(msg)
1019 # XXX the space after the header should not be there.
1020 eq(sfp.getvalue(), """\
1021From: test@dom.ain
1022References:\x20
1023 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
1024
1025Test""")
1026
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001027 def test_no_split_long_header(self):
1028 eq = self.ndiffAssertEqual
1029 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +00001030 h = Header(hstr)
1031 # These come on two lines because Headers are really field value
1032 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001033 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001034References:
1035 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1036 h = Header('x' * 80)
1037 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001038
1039 def test_splitting_multiple_long_lines(self):
1040 eq = self.ndiffAssertEqual
1041 hstr = """\
1042from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1043\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1044\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1045"""
1046 h = Header(hstr, continuation_ws='\t')
1047 eq(h.encode(), """\
1048from babylon.socal-raves.org (localhost [127.0.0.1]);
1049 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1050 for <mailman-admin@babylon.socal-raves.org>;
1051 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1052\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1053 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1054 for <mailman-admin@babylon.socal-raves.org>;
1055 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1056\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1057 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1058 for <mailman-admin@babylon.socal-raves.org>;
1059 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1060
1061 def test_splitting_first_line_only_is_long(self):
1062 eq = self.ndiffAssertEqual
1063 hstr = """\
1064from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1065\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1066\tid 17k4h5-00034i-00
1067\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1068 h = Header(hstr, maxlinelen=78, header_name='Received',
1069 continuation_ws='\t')
1070 eq(h.encode(), """\
1071from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1072 helo=cthulhu.gerg.ca)
1073\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1074\tid 17k4h5-00034i-00
1075\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1076
1077 def test_long_8bit_header(self):
1078 eq = self.ndiffAssertEqual
1079 msg = Message()
1080 h = Header('Britische Regierung gibt', 'iso-8859-1',
1081 header_name='Subject')
1082 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001083 eq(h.encode(maxlinelen=76), """\
1084=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1085 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001086 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001087 eq(msg.as_string(maxheaderlen=76), """\
1088Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1089 =?iso-8859-1?q?hore-Windkraftprojekte?=
1090
1091""")
1092 eq(msg.as_string(maxheaderlen=0), """\
1093Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001094
1095""")
1096
1097 def test_long_8bit_header_no_charset(self):
1098 eq = self.ndiffAssertEqual
1099 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001100 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1101 'f\xfcr Offshore-Windkraftprojekte '
1102 '<a-very-long-address@example.com>')
1103 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001104 eq(msg.as_string(maxheaderlen=78), """\
1105Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1106 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1107
1108""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001109 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001110 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001111 header_name='Reply-To')
1112 eq(msg.as_string(maxheaderlen=78), """\
1113Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1114 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001115
1116""")
1117
1118 def test_long_to_header(self):
1119 eq = self.ndiffAssertEqual
1120 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001121 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001122 '"Someone Test #B" <someone@umich.edu>, '
1123 '"Someone Test #C" <someone@eecs.umich.edu>, '
1124 '"Someone Test #D" <someone@eecs.umich.edu>')
1125 msg = Message()
1126 msg['To'] = to
1127 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001128To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001129 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001130 "Someone Test #C" <someone@eecs.umich.edu>,
1131 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001132
1133''')
1134
1135 def test_long_line_after_append(self):
1136 eq = self.ndiffAssertEqual
1137 s = 'This is an example of string which has almost the limit of header length.'
1138 h = Header(s)
1139 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001140 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001141This is an example of string which has almost the limit of header length.
1142 Add another line.""")
1143
1144 def test_shorter_line_with_append(self):
1145 eq = self.ndiffAssertEqual
1146 s = 'This is a shorter line.'
1147 h = Header(s)
1148 h.append('Add another sentence. (Surprise?)')
1149 eq(h.encode(),
1150 'This is a shorter line. Add another sentence. (Surprise?)')
1151
1152 def test_long_field_name(self):
1153 eq = self.ndiffAssertEqual
1154 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001155 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1156 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1157 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1158 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001159 h = Header(gs, 'iso-8859-1', header_name=fn)
1160 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001161 eq(h.encode(maxlinelen=76), """\
1162=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1163 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1164 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1165 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001166
1167 def test_long_received_header(self):
1168 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1169 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1170 'Wed, 05 Mar 2003 18:10:18 -0700')
1171 msg = Message()
1172 msg['Received-1'] = Header(h, continuation_ws='\t')
1173 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001174 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001175 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001176Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1177 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001178 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001179Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1180 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001181 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001182
1183""")
1184
1185 def test_string_headerinst_eq(self):
1186 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1187 'tu-muenchen.de> (David Bremner\'s message of '
1188 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1189 msg = Message()
1190 msg['Received-1'] = Header(h, header_name='Received-1',
1191 continuation_ws='\t')
1192 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001193 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001194 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001195Received-1:\x20
1196 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1197 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1198Received-2:\x20
1199 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1200 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001201
1202""")
1203
1204 def test_long_unbreakable_lines_with_continuation(self):
1205 eq = self.ndiffAssertEqual
1206 msg = Message()
1207 t = """\
1208iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1209 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1210 msg['Face-1'] = t
1211 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001212 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001213 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001214 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001215 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001216Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001217 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001218 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001219Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001220 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001221 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001222Face-3:\x20
1223 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1224 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001225
1226""")
1227
1228 def test_another_long_multiline_header(self):
1229 eq = self.ndiffAssertEqual
1230 m = ('Received: from siimage.com '
1231 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001232 'Microsoft SMTPSVC(5.0.2195.4905); '
1233 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001234 msg = email.message_from_string(m)
1235 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001236Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1237 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001238
1239''')
1240
1241 def test_long_lines_with_different_header(self):
1242 eq = self.ndiffAssertEqual
1243 h = ('List-Unsubscribe: '
1244 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1245 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1246 '?subject=unsubscribe>')
1247 msg = Message()
1248 msg['List'] = h
1249 msg['List'] = Header(h, header_name='List')
1250 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001251List: List-Unsubscribe:
1252 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001253 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001254List: List-Unsubscribe:
1255 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001256 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001257
1258""")
1259
R. David Murray6f0022d2011-01-07 21:57:25 +00001260 def test_long_rfc2047_header_with_embedded_fws(self):
1261 h = Header(textwrap.dedent("""\
1262 We're going to pretend this header is in a non-ascii character set
1263 \tto see if line wrapping with encoded words and embedded
1264 folding white space works"""),
1265 charset='utf-8',
1266 header_name='Test')
1267 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1268 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1269 =?utf-8?q?cter_set?=
1270 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1271 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1272
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001273
Ezio Melottib3aedd42010-11-20 19:04:17 +00001274
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001275# Test mangling of "From " lines in the body of a message
1276class TestFromMangling(unittest.TestCase):
1277 def setUp(self):
1278 self.msg = Message()
1279 self.msg['From'] = 'aaa@bbb.org'
1280 self.msg.set_payload("""\
1281From the desk of A.A.A.:
1282Blah blah blah
1283""")
1284
1285 def test_mangled_from(self):
1286 s = StringIO()
1287 g = Generator(s, mangle_from_=True)
1288 g.flatten(self.msg)
1289 self.assertEqual(s.getvalue(), """\
1290From: aaa@bbb.org
1291
1292>From the desk of A.A.A.:
1293Blah blah blah
1294""")
1295
1296 def test_dont_mangle_from(self):
1297 s = StringIO()
1298 g = Generator(s, mangle_from_=False)
1299 g.flatten(self.msg)
1300 self.assertEqual(s.getvalue(), """\
1301From: aaa@bbb.org
1302
1303From the desk of A.A.A.:
1304Blah blah blah
1305""")
1306
R David Murray6a31bc62012-07-22 21:47:53 -04001307 def test_mangle_from_in_preamble_and_epilog(self):
1308 s = StringIO()
1309 g = Generator(s, mangle_from_=True)
1310 msg = email.message_from_string(textwrap.dedent("""\
1311 From: foo@bar.com
1312 Mime-Version: 1.0
1313 Content-Type: multipart/mixed; boundary=XXX
1314
1315 From somewhere unknown
1316
1317 --XXX
1318 Content-Type: text/plain
1319
1320 foo
1321
1322 --XXX--
1323
1324 From somewhere unknowable
1325 """))
1326 g.flatten(msg)
1327 self.assertEqual(len([1 for x in s.getvalue().split('\n')
1328 if x.startswith('>From ')]), 2)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001329
R David Murray638d40b2012-08-24 11:14:13 -04001330 def test_mangled_from_with_bad_bytes(self):
1331 source = textwrap.dedent("""\
1332 Content-Type: text/plain; charset="utf-8"
1333 MIME-Version: 1.0
1334 Content-Transfer-Encoding: 8bit
1335 From: aaa@bbb.org
1336
1337 """).encode('utf-8')
1338 msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')
1339 b = BytesIO()
1340 g = BytesGenerator(b, mangle_from_=True)
1341 g.flatten(msg)
1342 self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')
1343
Ezio Melottib3aedd42010-11-20 19:04:17 +00001344
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001345# Test the basic MIMEAudio class
1346class TestMIMEAudio(unittest.TestCase):
1347 def setUp(self):
R David Murray28346b82011-03-31 11:40:20 -04001348 with openfile('audiotest.au', 'rb') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001349 self._audiodata = fp.read()
1350 self._au = MIMEAudio(self._audiodata)
1351
1352 def test_guess_minor_type(self):
1353 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1354
1355 def test_encoding(self):
1356 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001357 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1358 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001359
1360 def test_checkSetMinor(self):
1361 au = MIMEAudio(self._audiodata, 'fish')
1362 self.assertEqual(au.get_content_type(), 'audio/fish')
1363
1364 def test_add_header(self):
1365 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001366 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001367 self._au.add_header('Content-Disposition', 'attachment',
1368 filename='audiotest.au')
1369 eq(self._au['content-disposition'],
1370 'attachment; filename="audiotest.au"')
1371 eq(self._au.get_params(header='content-disposition'),
1372 [('attachment', ''), ('filename', 'audiotest.au')])
1373 eq(self._au.get_param('filename', header='content-disposition'),
1374 'audiotest.au')
1375 missing = []
1376 eq(self._au.get_param('attachment', header='content-disposition'), '')
1377 unless(self._au.get_param('foo', failobj=missing,
1378 header='content-disposition') is missing)
1379 # Try some missing stuff
1380 unless(self._au.get_param('foobar', missing) is missing)
1381 unless(self._au.get_param('attachment', missing,
1382 header='foobar') is missing)
1383
1384
Ezio Melottib3aedd42010-11-20 19:04:17 +00001385
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001386# Test the basic MIMEImage class
1387class TestMIMEImage(unittest.TestCase):
1388 def setUp(self):
1389 with openfile('PyBanner048.gif', 'rb') as fp:
1390 self._imgdata = fp.read()
1391 self._im = MIMEImage(self._imgdata)
1392
1393 def test_guess_minor_type(self):
1394 self.assertEqual(self._im.get_content_type(), 'image/gif')
1395
1396 def test_encoding(self):
1397 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001398 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1399 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001400
1401 def test_checkSetMinor(self):
1402 im = MIMEImage(self._imgdata, 'fish')
1403 self.assertEqual(im.get_content_type(), 'image/fish')
1404
1405 def test_add_header(self):
1406 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001407 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001408 self._im.add_header('Content-Disposition', 'attachment',
1409 filename='dingusfish.gif')
1410 eq(self._im['content-disposition'],
1411 'attachment; filename="dingusfish.gif"')
1412 eq(self._im.get_params(header='content-disposition'),
1413 [('attachment', ''), ('filename', 'dingusfish.gif')])
1414 eq(self._im.get_param('filename', header='content-disposition'),
1415 'dingusfish.gif')
1416 missing = []
1417 eq(self._im.get_param('attachment', header='content-disposition'), '')
1418 unless(self._im.get_param('foo', failobj=missing,
1419 header='content-disposition') is missing)
1420 # Try some missing stuff
1421 unless(self._im.get_param('foobar', missing) is missing)
1422 unless(self._im.get_param('attachment', missing,
1423 header='foobar') is missing)
1424
1425
Ezio Melottib3aedd42010-11-20 19:04:17 +00001426
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001427# Test the basic MIMEApplication class
1428class TestMIMEApplication(unittest.TestCase):
1429 def test_headers(self):
1430 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001431 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001432 eq(msg.get_content_type(), 'application/octet-stream')
1433 eq(msg['content-transfer-encoding'], 'base64')
1434
1435 def test_body(self):
1436 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001437 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1438 msg = MIMEApplication(bytesdata)
1439 # whitespace in the cte encoded block is RFC-irrelevant.
1440 eq(msg.get_payload().strip(), '+vv8/f7/')
1441 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001442
R David Murrayec317a82013-02-11 10:51:28 -05001443 def test_binary_body_with_encode_7or8bit(self):
1444 # Issue 17171.
1445 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1446 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit)
1447 # Treated as a string, this will be invalid code points.
1448 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1449 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1450 self.assertEqual(msg['Content-Transfer-Encoding'], '8bit')
1451 s = BytesIO()
1452 g = BytesGenerator(s)
1453 g.flatten(msg)
1454 wireform = s.getvalue()
1455 msg2 = email.message_from_bytes(wireform)
1456 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1457 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1458 self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit')
1459
1460 def test_binary_body_with_encode_noop(self):
R David Murrayceaa8b12013-02-09 13:02:58 -05001461 # Issue 16564: This does not produce an RFC valid message, since to be
1462 # valid it should have a CTE of binary. But the below works in
1463 # Python2, and is documented as working this way.
1464 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1465 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1466 # Treated as a string, this will be invalid code points.
1467 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1468 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1469 s = BytesIO()
1470 g = BytesGenerator(s)
1471 g.flatten(msg)
1472 wireform = s.getvalue()
1473 msg2 = email.message_from_bytes(wireform)
1474 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1475 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001476
R David Murrayf6069f92013-06-27 18:37:00 -04001477 def test_binary_body_with_encode_quopri(self):
1478 # Issue 14360.
1479 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff '
1480 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_quopri)
1481 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1482 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1483 self.assertEqual(msg['Content-Transfer-Encoding'], 'quoted-printable')
1484 s = BytesIO()
1485 g = BytesGenerator(s)
1486 g.flatten(msg)
1487 wireform = s.getvalue()
1488 msg2 = email.message_from_bytes(wireform)
1489 self.assertEqual(msg.get_payload(), '=FA=FB=FC=FD=FE=FF=20')
1490 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1491 self.assertEqual(msg2['Content-Transfer-Encoding'], 'quoted-printable')
1492
1493 def test_binary_body_with_encode_base64(self):
1494 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1495 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_base64)
1496 self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1497 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1498 s = BytesIO()
1499 g = BytesGenerator(s)
1500 g.flatten(msg)
1501 wireform = s.getvalue()
1502 msg2 = email.message_from_bytes(wireform)
1503 self.assertEqual(msg.get_payload(), '+vv8/f7/\n')
1504 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1505
Ezio Melottib3aedd42010-11-20 19:04:17 +00001506
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001507# Test the basic MIMEText class
1508class TestMIMEText(unittest.TestCase):
1509 def setUp(self):
1510 self._msg = MIMEText('hello there')
1511
1512 def test_types(self):
1513 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001514 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001515 eq(self._msg.get_content_type(), 'text/plain')
1516 eq(self._msg.get_param('charset'), 'us-ascii')
1517 missing = []
1518 unless(self._msg.get_param('foobar', missing) is missing)
1519 unless(self._msg.get_param('charset', missing, header='foobar')
1520 is missing)
1521
1522 def test_payload(self):
1523 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001524 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001525
1526 def test_charset(self):
1527 eq = self.assertEqual
1528 msg = MIMEText('hello there', _charset='us-ascii')
1529 eq(msg.get_charset().input_charset, 'us-ascii')
1530 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1531
R. David Murray850fc852010-06-03 01:58:28 +00001532 def test_7bit_input(self):
1533 eq = self.assertEqual
1534 msg = MIMEText('hello there', _charset='us-ascii')
1535 eq(msg.get_charset().input_charset, 'us-ascii')
1536 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1537
1538 def test_7bit_input_no_charset(self):
1539 eq = self.assertEqual
1540 msg = MIMEText('hello there')
1541 eq(msg.get_charset(), 'us-ascii')
1542 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1543 self.assertTrue('hello there' in msg.as_string())
1544
1545 def test_utf8_input(self):
1546 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1547 eq = self.assertEqual
1548 msg = MIMEText(teststr, _charset='utf-8')
1549 eq(msg.get_charset().output_charset, 'utf-8')
1550 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1551 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1552
1553 @unittest.skip("can't fix because of backward compat in email5, "
1554 "will fix in email6")
1555 def test_utf8_input_no_charset(self):
1556 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1557 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1558
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001559
Ezio Melottib3aedd42010-11-20 19:04:17 +00001560
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001561# Test complicated multipart/* messages
1562class TestMultipart(TestEmailBase):
1563 def setUp(self):
1564 with openfile('PyBanner048.gif', 'rb') as fp:
1565 data = fp.read()
1566 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1567 image = MIMEImage(data, name='dingusfish.gif')
1568 image.add_header('content-disposition', 'attachment',
1569 filename='dingusfish.gif')
1570 intro = MIMEText('''\
1571Hi there,
1572
1573This is the dingus fish.
1574''')
1575 container.attach(intro)
1576 container.attach(image)
1577 container['From'] = 'Barry <barry@digicool.com>'
1578 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1579 container['Subject'] = 'Here is your dingus fish'
1580
1581 now = 987809702.54848599
1582 timetuple = time.localtime(now)
1583 if timetuple[-1] == 0:
1584 tzsecs = time.timezone
1585 else:
1586 tzsecs = time.altzone
1587 if tzsecs > 0:
1588 sign = '-'
1589 else:
1590 sign = '+'
1591 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1592 container['Date'] = time.strftime(
1593 '%a, %d %b %Y %H:%M:%S',
1594 time.localtime(now)) + tzoffset
1595 self._msg = container
1596 self._im = image
1597 self._txt = intro
1598
1599 def test_hierarchy(self):
1600 # convenience
1601 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001602 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001603 raises = self.assertRaises
1604 # tests
1605 m = self._msg
1606 unless(m.is_multipart())
1607 eq(m.get_content_type(), 'multipart/mixed')
1608 eq(len(m.get_payload()), 2)
1609 raises(IndexError, m.get_payload, 2)
1610 m0 = m.get_payload(0)
1611 m1 = m.get_payload(1)
1612 unless(m0 is self._txt)
1613 unless(m1 is self._im)
1614 eq(m.get_payload(), [m0, m1])
1615 unless(not m0.is_multipart())
1616 unless(not m1.is_multipart())
1617
1618 def test_empty_multipart_idempotent(self):
1619 text = """\
1620Content-Type: multipart/mixed; boundary="BOUNDARY"
1621MIME-Version: 1.0
1622Subject: A subject
1623To: aperson@dom.ain
1624From: bperson@dom.ain
1625
1626
1627--BOUNDARY
1628
1629
1630--BOUNDARY--
1631"""
1632 msg = Parser().parsestr(text)
1633 self.ndiffAssertEqual(text, msg.as_string())
1634
1635 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1636 outer = MIMEBase('multipart', 'mixed')
1637 outer['Subject'] = 'A subject'
1638 outer['To'] = 'aperson@dom.ain'
1639 outer['From'] = 'bperson@dom.ain'
1640 outer.set_boundary('BOUNDARY')
1641 self.ndiffAssertEqual(outer.as_string(), '''\
1642Content-Type: multipart/mixed; boundary="BOUNDARY"
1643MIME-Version: 1.0
1644Subject: A subject
1645To: aperson@dom.ain
1646From: bperson@dom.ain
1647
1648--BOUNDARY
1649
1650--BOUNDARY--''')
1651
1652 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1653 outer = MIMEBase('multipart', 'mixed')
1654 outer['Subject'] = 'A subject'
1655 outer['To'] = 'aperson@dom.ain'
1656 outer['From'] = 'bperson@dom.ain'
1657 outer.preamble = ''
1658 outer.epilogue = ''
1659 outer.set_boundary('BOUNDARY')
1660 self.ndiffAssertEqual(outer.as_string(), '''\
1661Content-Type: multipart/mixed; boundary="BOUNDARY"
1662MIME-Version: 1.0
1663Subject: A subject
1664To: aperson@dom.ain
1665From: bperson@dom.ain
1666
1667
1668--BOUNDARY
1669
1670--BOUNDARY--
1671''')
1672
1673 def test_one_part_in_a_multipart(self):
1674 eq = self.ndiffAssertEqual
1675 outer = MIMEBase('multipart', 'mixed')
1676 outer['Subject'] = 'A subject'
1677 outer['To'] = 'aperson@dom.ain'
1678 outer['From'] = 'bperson@dom.ain'
1679 outer.set_boundary('BOUNDARY')
1680 msg = MIMEText('hello world')
1681 outer.attach(msg)
1682 eq(outer.as_string(), '''\
1683Content-Type: multipart/mixed; boundary="BOUNDARY"
1684MIME-Version: 1.0
1685Subject: A subject
1686To: aperson@dom.ain
1687From: bperson@dom.ain
1688
1689--BOUNDARY
1690Content-Type: text/plain; charset="us-ascii"
1691MIME-Version: 1.0
1692Content-Transfer-Encoding: 7bit
1693
1694hello world
1695--BOUNDARY--''')
1696
1697 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1698 eq = self.ndiffAssertEqual
1699 outer = MIMEBase('multipart', 'mixed')
1700 outer['Subject'] = 'A subject'
1701 outer['To'] = 'aperson@dom.ain'
1702 outer['From'] = 'bperson@dom.ain'
1703 outer.preamble = ''
1704 msg = MIMEText('hello world')
1705 outer.attach(msg)
1706 outer.set_boundary('BOUNDARY')
1707 eq(outer.as_string(), '''\
1708Content-Type: multipart/mixed; boundary="BOUNDARY"
1709MIME-Version: 1.0
1710Subject: A subject
1711To: aperson@dom.ain
1712From: bperson@dom.ain
1713
1714
1715--BOUNDARY
1716Content-Type: text/plain; charset="us-ascii"
1717MIME-Version: 1.0
1718Content-Transfer-Encoding: 7bit
1719
1720hello world
1721--BOUNDARY--''')
1722
1723
1724 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1725 eq = self.ndiffAssertEqual
1726 outer = MIMEBase('multipart', 'mixed')
1727 outer['Subject'] = 'A subject'
1728 outer['To'] = 'aperson@dom.ain'
1729 outer['From'] = 'bperson@dom.ain'
1730 outer.preamble = None
1731 msg = MIMEText('hello world')
1732 outer.attach(msg)
1733 outer.set_boundary('BOUNDARY')
1734 eq(outer.as_string(), '''\
1735Content-Type: multipart/mixed; boundary="BOUNDARY"
1736MIME-Version: 1.0
1737Subject: A subject
1738To: aperson@dom.ain
1739From: bperson@dom.ain
1740
1741--BOUNDARY
1742Content-Type: text/plain; charset="us-ascii"
1743MIME-Version: 1.0
1744Content-Transfer-Encoding: 7bit
1745
1746hello world
1747--BOUNDARY--''')
1748
1749
1750 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1751 eq = self.ndiffAssertEqual
1752 outer = MIMEBase('multipart', 'mixed')
1753 outer['Subject'] = 'A subject'
1754 outer['To'] = 'aperson@dom.ain'
1755 outer['From'] = 'bperson@dom.ain'
1756 outer.epilogue = None
1757 msg = MIMEText('hello world')
1758 outer.attach(msg)
1759 outer.set_boundary('BOUNDARY')
1760 eq(outer.as_string(), '''\
1761Content-Type: multipart/mixed; boundary="BOUNDARY"
1762MIME-Version: 1.0
1763Subject: A subject
1764To: aperson@dom.ain
1765From: bperson@dom.ain
1766
1767--BOUNDARY
1768Content-Type: text/plain; charset="us-ascii"
1769MIME-Version: 1.0
1770Content-Transfer-Encoding: 7bit
1771
1772hello world
1773--BOUNDARY--''')
1774
1775
1776 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1777 eq = self.ndiffAssertEqual
1778 outer = MIMEBase('multipart', 'mixed')
1779 outer['Subject'] = 'A subject'
1780 outer['To'] = 'aperson@dom.ain'
1781 outer['From'] = 'bperson@dom.ain'
1782 outer.epilogue = ''
1783 msg = MIMEText('hello world')
1784 outer.attach(msg)
1785 outer.set_boundary('BOUNDARY')
1786 eq(outer.as_string(), '''\
1787Content-Type: multipart/mixed; boundary="BOUNDARY"
1788MIME-Version: 1.0
1789Subject: A subject
1790To: aperson@dom.ain
1791From: bperson@dom.ain
1792
1793--BOUNDARY
1794Content-Type: text/plain; charset="us-ascii"
1795MIME-Version: 1.0
1796Content-Transfer-Encoding: 7bit
1797
1798hello world
1799--BOUNDARY--
1800''')
1801
1802
1803 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1804 eq = self.ndiffAssertEqual
1805 outer = MIMEBase('multipart', 'mixed')
1806 outer['Subject'] = 'A subject'
1807 outer['To'] = 'aperson@dom.ain'
1808 outer['From'] = 'bperson@dom.ain'
1809 outer.epilogue = '\n'
1810 msg = MIMEText('hello world')
1811 outer.attach(msg)
1812 outer.set_boundary('BOUNDARY')
1813 eq(outer.as_string(), '''\
1814Content-Type: multipart/mixed; boundary="BOUNDARY"
1815MIME-Version: 1.0
1816Subject: A subject
1817To: aperson@dom.ain
1818From: bperson@dom.ain
1819
1820--BOUNDARY
1821Content-Type: text/plain; charset="us-ascii"
1822MIME-Version: 1.0
1823Content-Transfer-Encoding: 7bit
1824
1825hello world
1826--BOUNDARY--
1827
1828''')
1829
1830 def test_message_external_body(self):
1831 eq = self.assertEqual
1832 msg = self._msgobj('msg_36.txt')
1833 eq(len(msg.get_payload()), 2)
1834 msg1 = msg.get_payload(1)
1835 eq(msg1.get_content_type(), 'multipart/alternative')
1836 eq(len(msg1.get_payload()), 2)
1837 for subpart in msg1.get_payload():
1838 eq(subpart.get_content_type(), 'message/external-body')
1839 eq(len(subpart.get_payload()), 1)
1840 subsubpart = subpart.get_payload(0)
1841 eq(subsubpart.get_content_type(), 'text/plain')
1842
1843 def test_double_boundary(self):
1844 # msg_37.txt is a multipart that contains two dash-boundary's in a
1845 # row. Our interpretation of RFC 2046 calls for ignoring the second
1846 # and subsequent boundaries.
1847 msg = self._msgobj('msg_37.txt')
1848 self.assertEqual(len(msg.get_payload()), 3)
1849
1850 def test_nested_inner_contains_outer_boundary(self):
1851 eq = self.ndiffAssertEqual
1852 # msg_38.txt has an inner part that contains outer boundaries. My
1853 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1854 # these are illegal and should be interpreted as unterminated inner
1855 # parts.
1856 msg = self._msgobj('msg_38.txt')
1857 sfp = StringIO()
1858 iterators._structure(msg, sfp)
1859 eq(sfp.getvalue(), """\
1860multipart/mixed
1861 multipart/mixed
1862 multipart/alternative
1863 text/plain
1864 text/plain
1865 text/plain
1866 text/plain
1867""")
1868
1869 def test_nested_with_same_boundary(self):
1870 eq = self.ndiffAssertEqual
1871 # msg 39.txt is similarly evil in that it's got inner parts that use
1872 # the same boundary as outer parts. Again, I believe the way this is
1873 # parsed is closest to the spirit of RFC 2046
1874 msg = self._msgobj('msg_39.txt')
1875 sfp = StringIO()
1876 iterators._structure(msg, sfp)
1877 eq(sfp.getvalue(), """\
1878multipart/mixed
1879 multipart/mixed
1880 multipart/alternative
1881 application/octet-stream
1882 application/octet-stream
1883 text/plain
1884""")
1885
1886 def test_boundary_in_non_multipart(self):
1887 msg = self._msgobj('msg_40.txt')
1888 self.assertEqual(msg.as_string(), '''\
1889MIME-Version: 1.0
1890Content-Type: text/html; boundary="--961284236552522269"
1891
1892----961284236552522269
1893Content-Type: text/html;
1894Content-Transfer-Encoding: 7Bit
1895
1896<html></html>
1897
1898----961284236552522269--
1899''')
1900
1901 def test_boundary_with_leading_space(self):
1902 eq = self.assertEqual
1903 msg = email.message_from_string('''\
1904MIME-Version: 1.0
1905Content-Type: multipart/mixed; boundary=" XXXX"
1906
1907-- XXXX
1908Content-Type: text/plain
1909
1910
1911-- XXXX
1912Content-Type: text/plain
1913
1914-- XXXX--
1915''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001916 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001917 eq(msg.get_boundary(), ' XXXX')
1918 eq(len(msg.get_payload()), 2)
1919
1920 def test_boundary_without_trailing_newline(self):
1921 m = Parser().parsestr("""\
1922Content-Type: multipart/mixed; boundary="===============0012394164=="
1923MIME-Version: 1.0
1924
1925--===============0012394164==
1926Content-Type: image/file1.jpg
1927MIME-Version: 1.0
1928Content-Transfer-Encoding: base64
1929
1930YXNkZg==
1931--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001932 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001933
1934
Ezio Melottib3aedd42010-11-20 19:04:17 +00001935
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001936# Test some badly formatted messages
R David Murrayc27e5222012-05-25 15:01:48 -04001937class TestNonConformant(TestEmailBase):
R David Murray3edd22a2011-04-18 13:59:37 -04001938
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001939 def test_parse_missing_minor_type(self):
1940 eq = self.assertEqual
1941 msg = self._msgobj('msg_14.txt')
1942 eq(msg.get_content_type(), 'text/plain')
1943 eq(msg.get_content_maintype(), 'text')
1944 eq(msg.get_content_subtype(), 'plain')
1945
R David Murray80e0aee2012-05-27 21:23:34 -04001946 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001947 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001948 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001949 msg = self._msgobj('msg_15.txt')
1950 # XXX We can probably eventually do better
1951 inner = msg.get_payload(0)
1952 unless(hasattr(inner, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04001953 self.assertEqual(len(inner.defects), 1)
1954 unless(isinstance(inner.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001955 errors.StartBoundaryNotFoundDefect))
1956
R David Murray80e0aee2012-05-27 21:23:34 -04001957 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001958 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001959 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001960 msg = self._msgobj('msg_25.txt')
1961 unless(isinstance(msg.get_payload(), str))
R David Murrayc27e5222012-05-25 15:01:48 -04001962 self.assertEqual(len(msg.defects), 2)
1963 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04001964 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04001965 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001966 errors.MultipartInvariantViolationDefect))
1967
R David Murray749073a2011-06-22 13:47:53 -04001968 multipart_msg = textwrap.dedent("""\
1969 Date: Wed, 14 Nov 2007 12:56:23 GMT
1970 From: foo@bar.invalid
1971 To: foo@bar.invalid
1972 Subject: Content-Transfer-Encoding: base64 and multipart
1973 MIME-Version: 1.0
1974 Content-Type: multipart/mixed;
1975 boundary="===============3344438784458119861=="{}
1976
1977 --===============3344438784458119861==
1978 Content-Type: text/plain
1979
1980 Test message
1981
1982 --===============3344438784458119861==
1983 Content-Type: application/octet-stream
1984 Content-Transfer-Encoding: base64
1985
1986 YWJj
1987
1988 --===============3344438784458119861==--
1989 """)
1990
R David Murray80e0aee2012-05-27 21:23:34 -04001991 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04001992 def test_multipart_invalid_cte(self):
R David Murrayc27e5222012-05-25 15:01:48 -04001993 msg = self._str_msg(
1994 self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
1995 self.assertEqual(len(msg.defects), 1)
1996 self.assertIsInstance(msg.defects[0],
R David Murray749073a2011-06-22 13:47:53 -04001997 errors.InvalidMultipartContentTransferEncodingDefect)
1998
R David Murray80e0aee2012-05-27 21:23:34 -04001999 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04002000 def test_multipart_no_cte_no_defect(self):
R David Murrayc27e5222012-05-25 15:01:48 -04002001 msg = self._str_msg(self.multipart_msg.format(''))
2002 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04002003
R David Murray80e0aee2012-05-27 21:23:34 -04002004 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04002005 def test_multipart_valid_cte_no_defect(self):
2006 for cte in ('7bit', '8bit', 'BINary'):
R David Murrayc27e5222012-05-25 15:01:48 -04002007 msg = self._str_msg(
R David Murray749073a2011-06-22 13:47:53 -04002008 self.multipart_msg.format(
R David Murrayc27e5222012-05-25 15:01:48 -04002009 "\nContent-Transfer-Encoding: {}".format(cte)))
2010 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04002011
R David Murray97f43c02012-06-24 05:03:27 -04002012 # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002013 def test_invalid_content_type(self):
2014 eq = self.assertEqual
2015 neq = self.ndiffAssertEqual
2016 msg = Message()
2017 # RFC 2045, $5.2 says invalid yields text/plain
2018 msg['Content-Type'] = 'text'
2019 eq(msg.get_content_maintype(), 'text')
2020 eq(msg.get_content_subtype(), 'plain')
2021 eq(msg.get_content_type(), 'text/plain')
2022 # Clear the old value and try something /really/ invalid
2023 del msg['content-type']
2024 msg['Content-Type'] = 'foo'
2025 eq(msg.get_content_maintype(), 'text')
2026 eq(msg.get_content_subtype(), 'plain')
2027 eq(msg.get_content_type(), 'text/plain')
2028 # Still, make sure that the message is idempotently generated
2029 s = StringIO()
2030 g = Generator(s)
2031 g.flatten(msg)
2032 neq(s.getvalue(), 'Content-Type: foo\n\n')
2033
2034 def test_no_start_boundary(self):
2035 eq = self.ndiffAssertEqual
2036 msg = self._msgobj('msg_31.txt')
2037 eq(msg.get_payload(), """\
2038--BOUNDARY
2039Content-Type: text/plain
2040
2041message 1
2042
2043--BOUNDARY
2044Content-Type: text/plain
2045
2046message 2
2047
2048--BOUNDARY--
2049""")
2050
2051 def test_no_separating_blank_line(self):
2052 eq = self.ndiffAssertEqual
2053 msg = self._msgobj('msg_35.txt')
2054 eq(msg.as_string(), """\
2055From: aperson@dom.ain
2056To: bperson@dom.ain
2057Subject: here's something interesting
2058
2059counter to RFC 2822, there's no separating newline here
2060""")
2061
R David Murray80e0aee2012-05-27 21:23:34 -04002062 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002063 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002064 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002065 msg = self._msgobj('msg_41.txt')
2066 unless(hasattr(msg, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04002067 self.assertEqual(len(msg.defects), 2)
2068 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04002069 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04002070 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002071 errors.MultipartInvariantViolationDefect))
2072
R David Murray80e0aee2012-05-27 21:23:34 -04002073 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002074 def test_missing_start_boundary(self):
2075 outer = self._msgobj('msg_42.txt')
2076 # The message structure is:
2077 #
2078 # multipart/mixed
2079 # text/plain
2080 # message/rfc822
2081 # multipart/mixed [*]
2082 #
2083 # [*] This message is missing its start boundary
2084 bad = outer.get_payload(1).get_payload(0)
R David Murrayc27e5222012-05-25 15:01:48 -04002085 self.assertEqual(len(bad.defects), 1)
2086 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002087 errors.StartBoundaryNotFoundDefect))
2088
R David Murray80e0aee2012-05-27 21:23:34 -04002089 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002090 def test_first_line_is_continuation_header(self):
2091 eq = self.assertEqual
R David Murrayadbdcdb2012-05-27 20:45:01 -04002092 m = ' Line 1\nSubject: test\n\nbody'
R David Murrayc27e5222012-05-25 15:01:48 -04002093 msg = email.message_from_string(m)
R David Murrayadbdcdb2012-05-27 20:45:01 -04002094 eq(msg.keys(), ['Subject'])
2095 eq(msg.get_payload(), 'body')
R David Murrayc27e5222012-05-25 15:01:48 -04002096 eq(len(msg.defects), 1)
R David Murrayadbdcdb2012-05-27 20:45:01 -04002097 self.assertDefectsEqual(msg.defects,
2098 [errors.FirstHeaderLineIsContinuationDefect])
R David Murrayc27e5222012-05-25 15:01:48 -04002099 eq(msg.defects[0].line, ' Line 1\n')
R David Murray3edd22a2011-04-18 13:59:37 -04002100
R David Murrayd41595b2012-05-28 20:14:10 -04002101 # test_defect_handling
R David Murrayadbdcdb2012-05-27 20:45:01 -04002102 def test_missing_header_body_separator(self):
2103 # Our heuristic if we see a line that doesn't look like a header (no
2104 # leading whitespace but no ':') is to assume that the blank line that
2105 # separates the header from the body is missing, and to stop parsing
2106 # headers and start parsing the body.
2107 msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
2108 self.assertEqual(msg.keys(), ['Subject'])
2109 self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
2110 self.assertDefectsEqual(msg.defects,
2111 [errors.MissingHeaderBodySeparatorDefect])
2112
Ezio Melottib3aedd42010-11-20 19:04:17 +00002113
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002114# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00002115class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002116 def test_rfc2047_multiline(self):
2117 eq = self.assertEqual
2118 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2119 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2120 dh = decode_header(s)
2121 eq(dh, [
R David Murray07ea53c2012-06-02 17:56:49 -04002122 (b'Re: ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002123 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
R David Murray07ea53c2012-06-02 17:56:49 -04002124 (b' baz foo bar ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002125 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2126 header = make_header(dh)
2127 eq(str(header),
2128 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00002129 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002130Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2131 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002132
R David Murray07ea53c2012-06-02 17:56:49 -04002133 def test_whitespace_keeper_unicode(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002134 eq = self.assertEqual
2135 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2136 dh = decode_header(s)
2137 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
R David Murray07ea53c2012-06-02 17:56:49 -04002138 (b' Pirard <pirard@dom.ain>', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002139 header = str(make_header(dh))
2140 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2141
R David Murray07ea53c2012-06-02 17:56:49 -04002142 def test_whitespace_keeper_unicode_2(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002143 eq = self.assertEqual
2144 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2145 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002146 eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'),
2147 (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002148 hu = str(make_header(dh))
2149 eq(hu, 'The quick brown fox jumped over the lazy dog')
2150
2151 def test_rfc2047_missing_whitespace(self):
2152 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2153 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002154 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2155 (b'rg', None), (b'\xe5', 'iso-8859-1'),
2156 (b'sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002157
2158 def test_rfc2047_with_whitespace(self):
2159 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2160 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002161 self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'),
2162 (b' rg ', None), (b'\xe5', 'iso-8859-1'),
2163 (b' sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002164
R. David Murrayc4e69cc2010-08-03 22:14:10 +00002165 def test_rfc2047_B_bad_padding(self):
2166 s = '=?iso-8859-1?B?%s?='
2167 data = [ # only test complete bytes
2168 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2169 ('dmk=', b'vi'), ('dmk', b'vi')
2170 ]
2171 for q, a in data:
2172 dh = decode_header(s % q)
2173 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002174
R. David Murray31e984c2010-10-01 15:40:20 +00002175 def test_rfc2047_Q_invalid_digits(self):
2176 # issue 10004.
2177 s = '=?iso-8659-1?Q?andr=e9=zz?='
2178 self.assertEqual(decode_header(s),
2179 [(b'andr\xe9=zz', 'iso-8659-1')])
2180
R David Murray07ea53c2012-06-02 17:56:49 -04002181 def test_rfc2047_rfc2047_1(self):
2182 # 1st testcase at end of rfc2047
2183 s = '(=?ISO-8859-1?Q?a?=)'
2184 self.assertEqual(decode_header(s),
2185 [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)])
2186
2187 def test_rfc2047_rfc2047_2(self):
2188 # 2nd testcase at end of rfc2047
2189 s = '(=?ISO-8859-1?Q?a?= b)'
2190 self.assertEqual(decode_header(s),
2191 [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)])
2192
2193 def test_rfc2047_rfc2047_3(self):
2194 # 3rd testcase at end of rfc2047
2195 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2196 self.assertEqual(decode_header(s),
2197 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2198
2199 def test_rfc2047_rfc2047_4(self):
2200 # 4th testcase at end of rfc2047
2201 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2202 self.assertEqual(decode_header(s),
2203 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2204
2205 def test_rfc2047_rfc2047_5a(self):
2206 # 5th testcase at end of rfc2047 newline is \r\n
2207 s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)'
2208 self.assertEqual(decode_header(s),
2209 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2210
2211 def test_rfc2047_rfc2047_5b(self):
2212 # 5th testcase at end of rfc2047 newline is \n
2213 s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)'
2214 self.assertEqual(decode_header(s),
2215 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2216
2217 def test_rfc2047_rfc2047_6(self):
2218 # 6th testcase at end of rfc2047
2219 s = '(=?ISO-8859-1?Q?a_b?=)'
2220 self.assertEqual(decode_header(s),
2221 [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)])
2222
2223 def test_rfc2047_rfc2047_7(self):
2224 # 7th testcase at end of rfc2047
2225 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)'
2226 self.assertEqual(decode_header(s),
2227 [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'),
2228 (b')', None)])
2229 self.assertEqual(make_header(decode_header(s)).encode(), s.lower())
2230 self.assertEqual(str(make_header(decode_header(s))), '(a b)')
2231
R David Murray82ffabd2012-06-03 12:27:07 -04002232 def test_multiline_header(self):
2233 s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>'
2234 self.assertEqual(decode_header(s),
2235 [(b'"M\xfcller T"', 'windows-1252'),
2236 (b'<T.Mueller@xxx.com>', None)])
2237 self.assertEqual(make_header(decode_header(s)).encode(),
2238 ''.join(s.splitlines()))
2239 self.assertEqual(str(make_header(decode_header(s))),
2240 '"Müller T" <T.Mueller@xxx.com>')
2241
Ezio Melottib3aedd42010-11-20 19:04:17 +00002242
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002243# Test the MIMEMessage class
2244class TestMIMEMessage(TestEmailBase):
2245 def setUp(self):
2246 with openfile('msg_11.txt') as fp:
2247 self._text = fp.read()
2248
2249 def test_type_error(self):
2250 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2251
2252 def test_valid_argument(self):
2253 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002254 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002255 subject = 'A sub-message'
2256 m = Message()
2257 m['Subject'] = subject
2258 r = MIMEMessage(m)
2259 eq(r.get_content_type(), 'message/rfc822')
2260 payload = r.get_payload()
2261 unless(isinstance(payload, list))
2262 eq(len(payload), 1)
2263 subpart = payload[0]
2264 unless(subpart is m)
2265 eq(subpart['subject'], subject)
2266
2267 def test_bad_multipart(self):
2268 eq = self.assertEqual
2269 msg1 = Message()
2270 msg1['Subject'] = 'subpart 1'
2271 msg2 = Message()
2272 msg2['Subject'] = 'subpart 2'
2273 r = MIMEMessage(msg1)
2274 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2275
2276 def test_generate(self):
2277 # First craft the message to be encapsulated
2278 m = Message()
2279 m['Subject'] = 'An enclosed message'
2280 m.set_payload('Here is the body of the message.\n')
2281 r = MIMEMessage(m)
2282 r['Subject'] = 'The enclosing message'
2283 s = StringIO()
2284 g = Generator(s)
2285 g.flatten(r)
2286 self.assertEqual(s.getvalue(), """\
2287Content-Type: message/rfc822
2288MIME-Version: 1.0
2289Subject: The enclosing message
2290
2291Subject: An enclosed message
2292
2293Here is the body of the message.
2294""")
2295
2296 def test_parse_message_rfc822(self):
2297 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002298 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002299 msg = self._msgobj('msg_11.txt')
2300 eq(msg.get_content_type(), 'message/rfc822')
2301 payload = msg.get_payload()
2302 unless(isinstance(payload, list))
2303 eq(len(payload), 1)
2304 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002305 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002306 eq(submsg['subject'], 'An enclosed message')
2307 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2308
2309 def test_dsn(self):
2310 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002311 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002312 # msg 16 is a Delivery Status Notification, see RFC 1894
2313 msg = self._msgobj('msg_16.txt')
2314 eq(msg.get_content_type(), 'multipart/report')
2315 unless(msg.is_multipart())
2316 eq(len(msg.get_payload()), 3)
2317 # Subpart 1 is a text/plain, human readable section
2318 subpart = msg.get_payload(0)
2319 eq(subpart.get_content_type(), 'text/plain')
2320 eq(subpart.get_payload(), """\
2321This report relates to a message you sent with the following header fields:
2322
2323 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2324 Date: Sun, 23 Sep 2001 20:10:55 -0700
2325 From: "Ian T. Henry" <henryi@oxy.edu>
2326 To: SoCal Raves <scr@socal-raves.org>
2327 Subject: [scr] yeah for Ians!!
2328
2329Your message cannot be delivered to the following recipients:
2330
2331 Recipient address: jangel1@cougar.noc.ucla.edu
2332 Reason: recipient reached disk quota
2333
2334""")
2335 # Subpart 2 contains the machine parsable DSN information. It
2336 # consists of two blocks of headers, represented by two nested Message
2337 # objects.
2338 subpart = msg.get_payload(1)
2339 eq(subpart.get_content_type(), 'message/delivery-status')
2340 eq(len(subpart.get_payload()), 2)
2341 # message/delivery-status should treat each block as a bunch of
2342 # headers, i.e. a bunch of Message objects.
2343 dsn1 = subpart.get_payload(0)
2344 unless(isinstance(dsn1, Message))
2345 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2346 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2347 # Try a missing one <wink>
2348 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2349 dsn2 = subpart.get_payload(1)
2350 unless(isinstance(dsn2, Message))
2351 eq(dsn2['action'], 'failed')
2352 eq(dsn2.get_params(header='original-recipient'),
2353 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2354 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2355 # Subpart 3 is the original message
2356 subpart = msg.get_payload(2)
2357 eq(subpart.get_content_type(), 'message/rfc822')
2358 payload = subpart.get_payload()
2359 unless(isinstance(payload, list))
2360 eq(len(payload), 1)
2361 subsubpart = payload[0]
2362 unless(isinstance(subsubpart, Message))
2363 eq(subsubpart.get_content_type(), 'text/plain')
2364 eq(subsubpart['message-id'],
2365 '<002001c144a6$8752e060$56104586@oxy.edu>')
2366
2367 def test_epilogue(self):
2368 eq = self.ndiffAssertEqual
2369 with openfile('msg_21.txt') as fp:
2370 text = fp.read()
2371 msg = Message()
2372 msg['From'] = 'aperson@dom.ain'
2373 msg['To'] = 'bperson@dom.ain'
2374 msg['Subject'] = 'Test'
2375 msg.preamble = 'MIME message'
2376 msg.epilogue = 'End of MIME message\n'
2377 msg1 = MIMEText('One')
2378 msg2 = MIMEText('Two')
2379 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2380 msg.attach(msg1)
2381 msg.attach(msg2)
2382 sfp = StringIO()
2383 g = Generator(sfp)
2384 g.flatten(msg)
2385 eq(sfp.getvalue(), text)
2386
2387 def test_no_nl_preamble(self):
2388 eq = self.ndiffAssertEqual
2389 msg = Message()
2390 msg['From'] = 'aperson@dom.ain'
2391 msg['To'] = 'bperson@dom.ain'
2392 msg['Subject'] = 'Test'
2393 msg.preamble = 'MIME message'
2394 msg.epilogue = ''
2395 msg1 = MIMEText('One')
2396 msg2 = MIMEText('Two')
2397 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2398 msg.attach(msg1)
2399 msg.attach(msg2)
2400 eq(msg.as_string(), """\
2401From: aperson@dom.ain
2402To: bperson@dom.ain
2403Subject: Test
2404Content-Type: multipart/mixed; boundary="BOUNDARY"
2405
2406MIME message
2407--BOUNDARY
2408Content-Type: text/plain; charset="us-ascii"
2409MIME-Version: 1.0
2410Content-Transfer-Encoding: 7bit
2411
2412One
2413--BOUNDARY
2414Content-Type: text/plain; charset="us-ascii"
2415MIME-Version: 1.0
2416Content-Transfer-Encoding: 7bit
2417
2418Two
2419--BOUNDARY--
2420""")
2421
2422 def test_default_type(self):
2423 eq = self.assertEqual
2424 with openfile('msg_30.txt') as fp:
2425 msg = email.message_from_file(fp)
2426 container1 = msg.get_payload(0)
2427 eq(container1.get_default_type(), 'message/rfc822')
2428 eq(container1.get_content_type(), 'message/rfc822')
2429 container2 = msg.get_payload(1)
2430 eq(container2.get_default_type(), 'message/rfc822')
2431 eq(container2.get_content_type(), 'message/rfc822')
2432 container1a = container1.get_payload(0)
2433 eq(container1a.get_default_type(), 'text/plain')
2434 eq(container1a.get_content_type(), 'text/plain')
2435 container2a = container2.get_payload(0)
2436 eq(container2a.get_default_type(), 'text/plain')
2437 eq(container2a.get_content_type(), 'text/plain')
2438
2439 def test_default_type_with_explicit_container_type(self):
2440 eq = self.assertEqual
2441 with openfile('msg_28.txt') as fp:
2442 msg = email.message_from_file(fp)
2443 container1 = msg.get_payload(0)
2444 eq(container1.get_default_type(), 'message/rfc822')
2445 eq(container1.get_content_type(), 'message/rfc822')
2446 container2 = msg.get_payload(1)
2447 eq(container2.get_default_type(), 'message/rfc822')
2448 eq(container2.get_content_type(), 'message/rfc822')
2449 container1a = container1.get_payload(0)
2450 eq(container1a.get_default_type(), 'text/plain')
2451 eq(container1a.get_content_type(), 'text/plain')
2452 container2a = container2.get_payload(0)
2453 eq(container2a.get_default_type(), 'text/plain')
2454 eq(container2a.get_content_type(), 'text/plain')
2455
2456 def test_default_type_non_parsed(self):
2457 eq = self.assertEqual
2458 neq = self.ndiffAssertEqual
2459 # Set up container
2460 container = MIMEMultipart('digest', 'BOUNDARY')
2461 container.epilogue = ''
2462 # Set up subparts
2463 subpart1a = MIMEText('message 1\n')
2464 subpart2a = MIMEText('message 2\n')
2465 subpart1 = MIMEMessage(subpart1a)
2466 subpart2 = MIMEMessage(subpart2a)
2467 container.attach(subpart1)
2468 container.attach(subpart2)
2469 eq(subpart1.get_content_type(), 'message/rfc822')
2470 eq(subpart1.get_default_type(), 'message/rfc822')
2471 eq(subpart2.get_content_type(), 'message/rfc822')
2472 eq(subpart2.get_default_type(), 'message/rfc822')
2473 neq(container.as_string(0), '''\
2474Content-Type: multipart/digest; boundary="BOUNDARY"
2475MIME-Version: 1.0
2476
2477--BOUNDARY
2478Content-Type: message/rfc822
2479MIME-Version: 1.0
2480
2481Content-Type: text/plain; charset="us-ascii"
2482MIME-Version: 1.0
2483Content-Transfer-Encoding: 7bit
2484
2485message 1
2486
2487--BOUNDARY
2488Content-Type: message/rfc822
2489MIME-Version: 1.0
2490
2491Content-Type: text/plain; charset="us-ascii"
2492MIME-Version: 1.0
2493Content-Transfer-Encoding: 7bit
2494
2495message 2
2496
2497--BOUNDARY--
2498''')
2499 del subpart1['content-type']
2500 del subpart1['mime-version']
2501 del subpart2['content-type']
2502 del subpart2['mime-version']
2503 eq(subpart1.get_content_type(), 'message/rfc822')
2504 eq(subpart1.get_default_type(), 'message/rfc822')
2505 eq(subpart2.get_content_type(), 'message/rfc822')
2506 eq(subpart2.get_default_type(), 'message/rfc822')
2507 neq(container.as_string(0), '''\
2508Content-Type: multipart/digest; boundary="BOUNDARY"
2509MIME-Version: 1.0
2510
2511--BOUNDARY
2512
2513Content-Type: text/plain; charset="us-ascii"
2514MIME-Version: 1.0
2515Content-Transfer-Encoding: 7bit
2516
2517message 1
2518
2519--BOUNDARY
2520
2521Content-Type: text/plain; charset="us-ascii"
2522MIME-Version: 1.0
2523Content-Transfer-Encoding: 7bit
2524
2525message 2
2526
2527--BOUNDARY--
2528''')
2529
2530 def test_mime_attachments_in_constructor(self):
2531 eq = self.assertEqual
2532 text1 = MIMEText('')
2533 text2 = MIMEText('')
2534 msg = MIMEMultipart(_subparts=(text1, text2))
2535 eq(len(msg.get_payload()), 2)
2536 eq(msg.get_payload(0), text1)
2537 eq(msg.get_payload(1), text2)
2538
Christian Heimes587c2bf2008-01-19 16:21:02 +00002539 def test_default_multipart_constructor(self):
2540 msg = MIMEMultipart()
2541 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002542
Ezio Melottib3aedd42010-11-20 19:04:17 +00002543
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002544# A general test of parser->model->generator idempotency. IOW, read a message
2545# in, parse it into a message object tree, then without touching the tree,
2546# regenerate the plain text. The original text and the transformed text
2547# should be identical. Note: that we ignore the Unix-From since that may
2548# contain a changed date.
2549class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002550
2551 linesep = '\n'
2552
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002553 def _msgobj(self, filename):
2554 with openfile(filename) as fp:
2555 data = fp.read()
2556 msg = email.message_from_string(data)
2557 return msg, data
2558
R. David Murray719a4492010-11-21 16:53:48 +00002559 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002560 eq = self.ndiffAssertEqual
2561 s = StringIO()
2562 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002563 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002564 eq(text, s.getvalue())
2565
2566 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002567 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002568 msg, text = self._msgobj('msg_01.txt')
2569 eq(msg.get_content_type(), 'text/plain')
2570 eq(msg.get_content_maintype(), 'text')
2571 eq(msg.get_content_subtype(), 'plain')
2572 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2573 eq(msg.get_param('charset'), 'us-ascii')
2574 eq(msg.preamble, None)
2575 eq(msg.epilogue, None)
2576 self._idempotent(msg, text)
2577
2578 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002579 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002580 msg, text = self._msgobj('msg_03.txt')
2581 eq(msg.get_content_type(), 'text/plain')
2582 eq(msg.get_params(), None)
2583 eq(msg.get_param('charset'), None)
2584 self._idempotent(msg, text)
2585
2586 def test_simple_multipart(self):
2587 msg, text = self._msgobj('msg_04.txt')
2588 self._idempotent(msg, text)
2589
2590 def test_MIME_digest(self):
2591 msg, text = self._msgobj('msg_02.txt')
2592 self._idempotent(msg, text)
2593
2594 def test_long_header(self):
2595 msg, text = self._msgobj('msg_27.txt')
2596 self._idempotent(msg, text)
2597
2598 def test_MIME_digest_with_part_headers(self):
2599 msg, text = self._msgobj('msg_28.txt')
2600 self._idempotent(msg, text)
2601
2602 def test_mixed_with_image(self):
2603 msg, text = self._msgobj('msg_06.txt')
2604 self._idempotent(msg, text)
2605
2606 def test_multipart_report(self):
2607 msg, text = self._msgobj('msg_05.txt')
2608 self._idempotent(msg, text)
2609
2610 def test_dsn(self):
2611 msg, text = self._msgobj('msg_16.txt')
2612 self._idempotent(msg, text)
2613
2614 def test_preamble_epilogue(self):
2615 msg, text = self._msgobj('msg_21.txt')
2616 self._idempotent(msg, text)
2617
2618 def test_multipart_one_part(self):
2619 msg, text = self._msgobj('msg_23.txt')
2620 self._idempotent(msg, text)
2621
2622 def test_multipart_no_parts(self):
2623 msg, text = self._msgobj('msg_24.txt')
2624 self._idempotent(msg, text)
2625
2626 def test_no_start_boundary(self):
2627 msg, text = self._msgobj('msg_31.txt')
2628 self._idempotent(msg, text)
2629
2630 def test_rfc2231_charset(self):
2631 msg, text = self._msgobj('msg_32.txt')
2632 self._idempotent(msg, text)
2633
2634 def test_more_rfc2231_parameters(self):
2635 msg, text = self._msgobj('msg_33.txt')
2636 self._idempotent(msg, text)
2637
2638 def test_text_plain_in_a_multipart_digest(self):
2639 msg, text = self._msgobj('msg_34.txt')
2640 self._idempotent(msg, text)
2641
2642 def test_nested_multipart_mixeds(self):
2643 msg, text = self._msgobj('msg_12a.txt')
2644 self._idempotent(msg, text)
2645
2646 def test_message_external_body_idempotent(self):
2647 msg, text = self._msgobj('msg_36.txt')
2648 self._idempotent(msg, text)
2649
R. David Murray719a4492010-11-21 16:53:48 +00002650 def test_message_delivery_status(self):
2651 msg, text = self._msgobj('msg_43.txt')
2652 self._idempotent(msg, text, unixfrom=True)
2653
R. David Murray96fd54e2010-10-08 15:55:28 +00002654 def test_message_signed_idempotent(self):
2655 msg, text = self._msgobj('msg_45.txt')
2656 self._idempotent(msg, text)
2657
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002658 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002659 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002660 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002661 # Get a message object and reset the seek pointer for other tests
2662 msg, text = self._msgobj('msg_05.txt')
2663 eq(msg.get_content_type(), 'multipart/report')
2664 # Test the Content-Type: parameters
2665 params = {}
2666 for pk, pv in msg.get_params():
2667 params[pk] = pv
2668 eq(params['report-type'], 'delivery-status')
2669 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002670 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2671 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002672 eq(len(msg.get_payload()), 3)
2673 # Make sure the subparts are what we expect
2674 msg1 = msg.get_payload(0)
2675 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002676 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002677 msg2 = msg.get_payload(1)
2678 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002679 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002680 msg3 = msg.get_payload(2)
2681 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002682 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002683 payload = msg3.get_payload()
2684 unless(isinstance(payload, list))
2685 eq(len(payload), 1)
2686 msg4 = payload[0]
2687 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002688 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002689
2690 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002691 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002692 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002693 msg, text = self._msgobj('msg_06.txt')
2694 # Check some of the outer headers
2695 eq(msg.get_content_type(), 'message/rfc822')
2696 # Make sure the payload is a list of exactly one sub-Message, and that
2697 # that submessage has a type of text/plain
2698 payload = msg.get_payload()
2699 unless(isinstance(payload, list))
2700 eq(len(payload), 1)
2701 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002702 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002703 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002704 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002705 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002706
2707
Ezio Melottib3aedd42010-11-20 19:04:17 +00002708
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002709# Test various other bits of the package's functionality
2710class TestMiscellaneous(TestEmailBase):
2711 def test_message_from_string(self):
2712 with openfile('msg_01.txt') as fp:
2713 text = fp.read()
2714 msg = email.message_from_string(text)
2715 s = StringIO()
2716 # Don't wrap/continue long headers since we're trying to test
2717 # idempotency.
2718 g = Generator(s, maxheaderlen=0)
2719 g.flatten(msg)
2720 self.assertEqual(text, s.getvalue())
2721
2722 def test_message_from_file(self):
2723 with openfile('msg_01.txt') as fp:
2724 text = fp.read()
2725 fp.seek(0)
2726 msg = email.message_from_file(fp)
2727 s = StringIO()
2728 # Don't wrap/continue long headers since we're trying to test
2729 # idempotency.
2730 g = Generator(s, maxheaderlen=0)
2731 g.flatten(msg)
2732 self.assertEqual(text, s.getvalue())
2733
2734 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002735 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002736 with openfile('msg_01.txt') as fp:
2737 text = fp.read()
2738
2739 # Create a subclass
2740 class MyMessage(Message):
2741 pass
2742
2743 msg = email.message_from_string(text, MyMessage)
2744 unless(isinstance(msg, MyMessage))
2745 # Try something more complicated
2746 with openfile('msg_02.txt') as fp:
2747 text = fp.read()
2748 msg = email.message_from_string(text, MyMessage)
2749 for subpart in msg.walk():
2750 unless(isinstance(subpart, MyMessage))
2751
2752 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002753 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002754 # Create a subclass
2755 class MyMessage(Message):
2756 pass
2757
2758 with openfile('msg_01.txt') as fp:
2759 msg = email.message_from_file(fp, MyMessage)
2760 unless(isinstance(msg, MyMessage))
2761 # Try something more complicated
2762 with openfile('msg_02.txt') as fp:
2763 msg = email.message_from_file(fp, MyMessage)
2764 for subpart in msg.walk():
2765 unless(isinstance(subpart, MyMessage))
2766
R David Murrayc27e5222012-05-25 15:01:48 -04002767 def test_custom_message_does_not_require_arguments(self):
2768 class MyMessage(Message):
2769 def __init__(self):
2770 super().__init__()
2771 msg = self._str_msg("Subject: test\n\ntest", MyMessage)
2772 self.assertTrue(isinstance(msg, MyMessage))
2773
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002774 def test__all__(self):
2775 module = __import__('email')
R David Murray1b6c7242012-03-16 22:43:05 -04002776 self.assertEqual(sorted(module.__all__), [
2777 'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2778 'generator', 'header', 'iterators', 'message',
2779 'message_from_binary_file', 'message_from_bytes',
2780 'message_from_file', 'message_from_string', 'mime', 'parser',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002781 'quoprimime', 'utils',
2782 ])
2783
2784 def test_formatdate(self):
2785 now = time.time()
2786 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2787 time.gmtime(now)[:6])
2788
2789 def test_formatdate_localtime(self):
2790 now = time.time()
2791 self.assertEqual(
2792 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2793 time.localtime(now)[:6])
2794
2795 def test_formatdate_usegmt(self):
2796 now = time.time()
2797 self.assertEqual(
2798 utils.formatdate(now, localtime=False),
2799 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2800 self.assertEqual(
2801 utils.formatdate(now, localtime=False, usegmt=True),
2802 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2803
Georg Brandl1aca31e2012-09-22 09:03:56 +02002804 # parsedate and parsedate_tz will become deprecated interfaces someday
2805 def test_parsedate_returns_None_for_invalid_strings(self):
2806 self.assertIsNone(utils.parsedate(''))
2807 self.assertIsNone(utils.parsedate_tz(''))
2808 self.assertIsNone(utils.parsedate('0'))
2809 self.assertIsNone(utils.parsedate_tz('0'))
2810 self.assertIsNone(utils.parsedate('A Complete Waste of Time'))
2811 self.assertIsNone(utils.parsedate_tz('A Complete Waste of Time'))
2812 # Not a part of the spec but, but this has historically worked:
2813 self.assertIsNone(utils.parsedate(None))
2814 self.assertIsNone(utils.parsedate_tz(None))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002815
2816 def test_parsedate_compact(self):
2817 # The FWS after the comma is optional
2818 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2819 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2820
2821 def test_parsedate_no_dayofweek(self):
2822 eq = self.assertEqual
2823 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2824 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2825
2826 def test_parsedate_compact_no_dayofweek(self):
2827 eq = self.assertEqual
2828 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2829 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2830
R. David Murray4a62e892010-12-23 20:35:46 +00002831 def test_parsedate_no_space_before_positive_offset(self):
2832 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2833 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2834
2835 def test_parsedate_no_space_before_negative_offset(self):
2836 # Issue 1155362: we already handled '+' for this case.
2837 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2838 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2839
2840
R David Murrayaccd1c02011-03-13 20:06:23 -04002841 def test_parsedate_accepts_time_with_dots(self):
2842 eq = self.assertEqual
2843 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2844 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2845 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2846 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2847
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002848 def test_parsedate_acceptable_to_time_functions(self):
2849 eq = self.assertEqual
2850 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2851 t = int(time.mktime(timetup))
2852 eq(time.localtime(t)[:6], timetup[:6])
2853 eq(int(time.strftime('%Y', timetup)), 2003)
2854 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2855 t = int(time.mktime(timetup[:9]))
2856 eq(time.localtime(t)[:6], timetup[:6])
2857 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2858
Alexander Belopolskya07548e2012-06-21 20:34:09 -04002859 def test_mktime_tz(self):
2860 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2861 -1, -1, -1, 0)), 0)
2862 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2863 -1, -1, -1, 1234)), -1234)
2864
R. David Murray219d1c82010-08-25 00:45:55 +00002865 def test_parsedate_y2k(self):
2866 """Test for parsing a date with a two-digit year.
2867
2868 Parsing a date with a two-digit year should return the correct
2869 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2870 obsoletes RFC822) requires four-digit years.
2871
2872 """
2873 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2874 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2875 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2876 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2877
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002878 def test_parseaddr_empty(self):
2879 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2880 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2881
2882 def test_noquote_dump(self):
2883 self.assertEqual(
2884 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2885 'A Silly Person <person@dom.ain>')
2886
2887 def test_escape_dump(self):
2888 self.assertEqual(
2889 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
R David Murrayb53319f2012-03-14 15:31:47 -04002890 r'"A (Very) Silly Person" <person@dom.ain>')
2891 self.assertEqual(
2892 utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
2893 ('A (Very) Silly Person', 'person@dom.ain'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002894 a = r'A \(Special\) Person'
2895 b = 'person@dom.ain'
2896 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2897
2898 def test_escape_backslashes(self):
2899 self.assertEqual(
2900 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2901 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2902 a = r'Arthur \Backslash\ Foobar'
2903 b = 'person@dom.ain'
2904 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2905
R David Murray8debacb2011-04-06 09:35:57 -04002906 def test_quotes_unicode_names(self):
2907 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2908 name = "H\u00e4ns W\u00fcrst"
2909 addr = 'person@dom.ain'
2910 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2911 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
2912 self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
2913 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
2914 latin1_quopri)
2915
2916 def test_accepts_any_charset_like_object(self):
2917 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2918 name = "H\u00e4ns W\u00fcrst"
2919 addr = 'person@dom.ain'
2920 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2921 foobar = "FOOBAR"
2922 class CharsetMock:
2923 def header_encode(self, string):
2924 return foobar
2925 mock = CharsetMock()
2926 mock_expected = "%s <%s>" % (foobar, addr)
2927 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
2928 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
2929 utf8_base64)
2930
2931 def test_invalid_charset_like_object_raises_error(self):
2932 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2933 name = "H\u00e4ns W\u00fcrst"
2934 addr = 'person@dom.ain'
2935 # A object without a header_encode method:
2936 bad_charset = object()
2937 self.assertRaises(AttributeError, utils.formataddr, (name, addr),
2938 bad_charset)
2939
2940 def test_unicode_address_raises_error(self):
2941 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2942 addr = 'pers\u00f6n@dom.in'
2943 self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
2944 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
2945
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002946 def test_name_with_dot(self):
2947 x = 'John X. Doe <jxd@example.com>'
2948 y = '"John X. Doe" <jxd@example.com>'
2949 a, b = ('John X. Doe', 'jxd@example.com')
2950 self.assertEqual(utils.parseaddr(x), (a, b))
2951 self.assertEqual(utils.parseaddr(y), (a, b))
2952 # formataddr() quotes the name if there's a dot in it
2953 self.assertEqual(utils.formataddr((a, b)), y)
2954
R. David Murray5397e862010-10-02 15:58:26 +00002955 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2956 # issue 10005. Note that in the third test the second pair of
2957 # backslashes is not actually a quoted pair because it is not inside a
2958 # comment or quoted string: the address being parsed has a quoted
2959 # string containing a quoted backslash, followed by 'example' and two
2960 # backslashes, followed by another quoted string containing a space and
2961 # the word 'example'. parseaddr copies those two backslashes
2962 # literally. Per rfc5322 this is not technically correct since a \ may
2963 # not appear in an address outside of a quoted string. It is probably
2964 # a sensible Postel interpretation, though.
2965 eq = self.assertEqual
2966 eq(utils.parseaddr('""example" example"@example.com'),
2967 ('', '""example" example"@example.com'))
2968 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2969 ('', '"\\"example\\" example"@example.com'))
2970 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2971 ('', '"\\\\"example\\\\" example"@example.com'))
2972
R. David Murray63563cd2010-12-18 18:25:38 +00002973 def test_parseaddr_preserves_spaces_in_local_part(self):
2974 # issue 9286. A normal RFC5322 local part should not contain any
2975 # folding white space, but legacy local parts can (they are a sequence
2976 # of atoms, not dotatoms). On the other hand we strip whitespace from
2977 # before the @ and around dots, on the assumption that the whitespace
2978 # around the punctuation is a mistake in what would otherwise be
2979 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2980 self.assertEqual(('', "merwok wok@xample.com"),
2981 utils.parseaddr("merwok wok@xample.com"))
2982 self.assertEqual(('', "merwok wok@xample.com"),
2983 utils.parseaddr("merwok wok@xample.com"))
2984 self.assertEqual(('', "merwok wok@xample.com"),
2985 utils.parseaddr(" merwok wok @xample.com"))
2986 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2987 utils.parseaddr('merwok"wok" wok@xample.com'))
2988 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2989 utils.parseaddr('merwok. wok . wok@xample.com'))
2990
R David Murrayb53319f2012-03-14 15:31:47 -04002991 def test_formataddr_does_not_quote_parens_in_quoted_string(self):
2992 addr = ("'foo@example.com' (foo@example.com)",
2993 'foo@example.com')
2994 addrstr = ('"\'foo@example.com\' '
2995 '(foo@example.com)" <foo@example.com>')
2996 self.assertEqual(utils.parseaddr(addrstr), addr)
2997 self.assertEqual(utils.formataddr(addr), addrstr)
2998
2999
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003000 def test_multiline_from_comment(self):
3001 x = """\
3002Foo
3003\tBar <foo@example.com>"""
3004 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
3005
3006 def test_quote_dump(self):
3007 self.assertEqual(
3008 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
3009 r'"A Silly; Person" <person@dom.ain>')
3010
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003011 def test_charset_richcomparisons(self):
3012 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003013 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003014 cset1 = Charset()
3015 cset2 = Charset()
3016 eq(cset1, 'us-ascii')
3017 eq(cset1, 'US-ASCII')
3018 eq(cset1, 'Us-AsCiI')
3019 eq('us-ascii', cset1)
3020 eq('US-ASCII', cset1)
3021 eq('Us-AsCiI', cset1)
3022 ne(cset1, 'usascii')
3023 ne(cset1, 'USASCII')
3024 ne(cset1, 'UsAsCiI')
3025 ne('usascii', cset1)
3026 ne('USASCII', cset1)
3027 ne('UsAsCiI', cset1)
3028 eq(cset1, cset2)
3029 eq(cset2, cset1)
3030
3031 def test_getaddresses(self):
3032 eq = self.assertEqual
3033 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
3034 'Bud Person <bperson@dom.ain>']),
3035 [('Al Person', 'aperson@dom.ain'),
3036 ('Bud Person', 'bperson@dom.ain')])
3037
3038 def test_getaddresses_nasty(self):
3039 eq = self.assertEqual
3040 eq(utils.getaddresses(['foo: ;']), [('', '')])
3041 eq(utils.getaddresses(
3042 ['[]*-- =~$']),
3043 [('', ''), ('', ''), ('', '*--')])
3044 eq(utils.getaddresses(
3045 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
3046 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
3047
3048 def test_getaddresses_embedded_comment(self):
3049 """Test proper handling of a nested comment"""
3050 eq = self.assertEqual
3051 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
3052 eq(addrs[0][1], 'foo@bar.com')
3053
3054 def test_utils_quote_unquote(self):
3055 eq = self.assertEqual
3056 msg = Message()
3057 msg.add_header('content-disposition', 'attachment',
3058 filename='foo\\wacky"name')
3059 eq(msg.get_filename(), 'foo\\wacky"name')
3060
3061 def test_get_body_encoding_with_bogus_charset(self):
3062 charset = Charset('not a charset')
3063 self.assertEqual(charset.get_body_encoding(), 'base64')
3064
3065 def test_get_body_encoding_with_uppercase_charset(self):
3066 eq = self.assertEqual
3067 msg = Message()
3068 msg['Content-Type'] = 'text/plain; charset=UTF-8'
3069 eq(msg['content-type'], 'text/plain; charset=UTF-8')
3070 charsets = msg.get_charsets()
3071 eq(len(charsets), 1)
3072 eq(charsets[0], 'utf-8')
3073 charset = Charset(charsets[0])
3074 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003075 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003076 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
3077 eq(msg.get_payload(decode=True), b'hello world')
3078 eq(msg['content-transfer-encoding'], 'base64')
3079 # Try another one
3080 msg = Message()
3081 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
3082 charsets = msg.get_charsets()
3083 eq(len(charsets), 1)
3084 eq(charsets[0], 'us-ascii')
3085 charset = Charset(charsets[0])
3086 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
3087 msg.set_payload('hello world', charset=charset)
3088 eq(msg.get_payload(), 'hello world')
3089 eq(msg['content-transfer-encoding'], '7bit')
3090
3091 def test_charsets_case_insensitive(self):
3092 lc = Charset('us-ascii')
3093 uc = Charset('US-ASCII')
3094 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
3095
3096 def test_partial_falls_inside_message_delivery_status(self):
3097 eq = self.ndiffAssertEqual
3098 # The Parser interface provides chunks of data to FeedParser in 8192
3099 # byte gulps. SF bug #1076485 found one of those chunks inside
3100 # message/delivery-status header block, which triggered an
3101 # unreadline() of NeedMoreData.
3102 msg = self._msgobj('msg_43.txt')
3103 sfp = StringIO()
3104 iterators._structure(msg, sfp)
3105 eq(sfp.getvalue(), """\
3106multipart/report
3107 text/plain
3108 message/delivery-status
3109 text/plain
3110 text/plain
3111 text/plain
3112 text/plain
3113 text/plain
3114 text/plain
3115 text/plain
3116 text/plain
3117 text/plain
3118 text/plain
3119 text/plain
3120 text/plain
3121 text/plain
3122 text/plain
3123 text/plain
3124 text/plain
3125 text/plain
3126 text/plain
3127 text/plain
3128 text/plain
3129 text/plain
3130 text/plain
3131 text/plain
3132 text/plain
3133 text/plain
3134 text/plain
3135 text/rfc822-headers
3136""")
3137
R. David Murraya0b44b52010-12-02 21:47:19 +00003138 def test_make_msgid_domain(self):
3139 self.assertEqual(
3140 email.utils.make_msgid(domain='testdomain-string')[-19:],
3141 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003142
R David Murraye67c6c52013-03-07 16:38:03 -05003143 def test_Generator_linend(self):
3144 # Issue 14645.
3145 with openfile('msg_26.txt', newline='\n') as f:
3146 msgtxt = f.read()
3147 msgtxt_nl = msgtxt.replace('\r\n', '\n')
3148 msg = email.message_from_string(msgtxt)
3149 s = StringIO()
3150 g = email.generator.Generator(s)
3151 g.flatten(msg)
3152 self.assertEqual(s.getvalue(), msgtxt_nl)
3153
3154 def test_BytesGenerator_linend(self):
3155 # Issue 14645.
3156 with openfile('msg_26.txt', newline='\n') as f:
3157 msgtxt = f.read()
3158 msgtxt_nl = msgtxt.replace('\r\n', '\n')
3159 msg = email.message_from_string(msgtxt_nl)
3160 s = BytesIO()
3161 g = email.generator.BytesGenerator(s)
3162 g.flatten(msg, linesep='\r\n')
3163 self.assertEqual(s.getvalue().decode('ascii'), msgtxt)
3164
3165 def test_BytesGenerator_linend_with_non_ascii(self):
3166 # Issue 14645.
3167 with openfile('msg_26.txt', 'rb') as f:
3168 msgtxt = f.read()
3169 msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6')
3170 msgtxt_nl = msgtxt.replace(b'\r\n', b'\n')
3171 msg = email.message_from_bytes(msgtxt_nl)
3172 s = BytesIO()
3173 g = email.generator.BytesGenerator(s)
3174 g.flatten(msg, linesep='\r\n')
3175 self.assertEqual(s.getvalue(), msgtxt)
3176
Ezio Melottib3aedd42010-11-20 19:04:17 +00003177
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003178# Test the iterator/generators
3179class TestIterators(TestEmailBase):
3180 def test_body_line_iterator(self):
3181 eq = self.assertEqual
3182 neq = self.ndiffAssertEqual
3183 # First a simple non-multipart message
3184 msg = self._msgobj('msg_01.txt')
3185 it = iterators.body_line_iterator(msg)
3186 lines = list(it)
3187 eq(len(lines), 6)
3188 neq(EMPTYSTRING.join(lines), msg.get_payload())
3189 # Now a more complicated multipart
3190 msg = self._msgobj('msg_02.txt')
3191 it = iterators.body_line_iterator(msg)
3192 lines = list(it)
3193 eq(len(lines), 43)
3194 with openfile('msg_19.txt') as fp:
3195 neq(EMPTYSTRING.join(lines), fp.read())
3196
3197 def test_typed_subpart_iterator(self):
3198 eq = self.assertEqual
3199 msg = self._msgobj('msg_04.txt')
3200 it = iterators.typed_subpart_iterator(msg, 'text')
3201 lines = []
3202 subparts = 0
3203 for subpart in it:
3204 subparts += 1
3205 lines.append(subpart.get_payload())
3206 eq(subparts, 2)
3207 eq(EMPTYSTRING.join(lines), """\
3208a simple kind of mirror
3209to reflect upon our own
3210a simple kind of mirror
3211to reflect upon our own
3212""")
3213
3214 def test_typed_subpart_iterator_default_type(self):
3215 eq = self.assertEqual
3216 msg = self._msgobj('msg_03.txt')
3217 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
3218 lines = []
3219 subparts = 0
3220 for subpart in it:
3221 subparts += 1
3222 lines.append(subpart.get_payload())
3223 eq(subparts, 1)
3224 eq(EMPTYSTRING.join(lines), """\
3225
3226Hi,
3227
3228Do you like this message?
3229
3230-Me
3231""")
3232
R. David Murray45bf773f2010-07-17 01:19:57 +00003233 def test_pushCR_LF(self):
3234 '''FeedParser BufferedSubFile.push() assumed it received complete
3235 line endings. A CR ending one push() followed by a LF starting
3236 the next push() added an empty line.
3237 '''
3238 imt = [
3239 ("a\r \n", 2),
3240 ("b", 0),
3241 ("c\n", 1),
3242 ("", 0),
3243 ("d\r\n", 1),
3244 ("e\r", 0),
3245 ("\nf", 1),
3246 ("\r\n", 1),
3247 ]
3248 from email.feedparser import BufferedSubFile, NeedMoreData
3249 bsf = BufferedSubFile()
3250 om = []
3251 nt = 0
3252 for il, n in imt:
3253 bsf.push(il)
3254 nt += n
3255 n1 = 0
3256 while True:
3257 ol = bsf.readline()
3258 if ol == NeedMoreData:
3259 break
3260 om.append(ol)
3261 n1 += 1
3262 self.assertTrue(n == n1)
3263 self.assertTrue(len(om) == nt)
3264 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
3265
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003266
Ezio Melottib3aedd42010-11-20 19:04:17 +00003267
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003268class TestParsers(TestEmailBase):
R David Murrayb35c8502011-04-13 16:46:05 -04003269
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003270 def test_header_parser(self):
3271 eq = self.assertEqual
3272 # Parse only the headers of a complex multipart MIME document
3273 with openfile('msg_02.txt') as fp:
3274 msg = HeaderParser().parse(fp)
3275 eq(msg['from'], 'ppp-request@zzz.org')
3276 eq(msg['to'], 'ppp@zzz.org')
3277 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003278 self.assertFalse(msg.is_multipart())
3279 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003280
R David Murrayb35c8502011-04-13 16:46:05 -04003281 def test_bytes_header_parser(self):
3282 eq = self.assertEqual
3283 # Parse only the headers of a complex multipart MIME document
3284 with openfile('msg_02.txt', 'rb') as fp:
3285 msg = email.parser.BytesHeaderParser().parse(fp)
3286 eq(msg['from'], 'ppp-request@zzz.org')
3287 eq(msg['to'], 'ppp@zzz.org')
3288 eq(msg.get_content_type(), 'multipart/mixed')
3289 self.assertFalse(msg.is_multipart())
3290 self.assertTrue(isinstance(msg.get_payload(), str))
3291 self.assertTrue(isinstance(msg.get_payload(decode=True), bytes))
3292
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003293 def test_whitespace_continuation(self):
3294 eq = self.assertEqual
3295 # This message contains a line after the Subject: header that has only
3296 # whitespace, but it is not empty!
3297 msg = email.message_from_string("""\
3298From: aperson@dom.ain
3299To: bperson@dom.ain
3300Subject: the next line has a space on it
3301\x20
3302Date: Mon, 8 Apr 2002 15:09:19 -0400
3303Message-ID: spam
3304
3305Here's the message body
3306""")
3307 eq(msg['subject'], 'the next line has a space on it\n ')
3308 eq(msg['message-id'], 'spam')
3309 eq(msg.get_payload(), "Here's the message body\n")
3310
3311 def test_whitespace_continuation_last_header(self):
3312 eq = self.assertEqual
3313 # Like the previous test, but the subject line is the last
3314 # header.
3315 msg = email.message_from_string("""\
3316From: aperson@dom.ain
3317To: bperson@dom.ain
3318Date: Mon, 8 Apr 2002 15:09:19 -0400
3319Message-ID: spam
3320Subject: the next line has a space on it
3321\x20
3322
3323Here's the message body
3324""")
3325 eq(msg['subject'], 'the next line has a space on it\n ')
3326 eq(msg['message-id'], 'spam')
3327 eq(msg.get_payload(), "Here's the message body\n")
3328
3329 def test_crlf_separation(self):
3330 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003331 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003332 msg = Parser().parse(fp)
3333 eq(len(msg.get_payload()), 2)
3334 part1 = msg.get_payload(0)
3335 eq(part1.get_content_type(), 'text/plain')
3336 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3337 part2 = msg.get_payload(1)
3338 eq(part2.get_content_type(), 'application/riscos')
3339
R. David Murray8451c4b2010-10-23 22:19:56 +00003340 def test_crlf_flatten(self):
3341 # Using newline='\n' preserves the crlfs in this input file.
3342 with openfile('msg_26.txt', newline='\n') as fp:
3343 text = fp.read()
3344 msg = email.message_from_string(text)
3345 s = StringIO()
3346 g = Generator(s)
3347 g.flatten(msg, linesep='\r\n')
3348 self.assertEqual(s.getvalue(), text)
3349
3350 maxDiff = None
3351
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003352 def test_multipart_digest_with_extra_mime_headers(self):
3353 eq = self.assertEqual
3354 neq = self.ndiffAssertEqual
3355 with openfile('msg_28.txt') as fp:
3356 msg = email.message_from_file(fp)
3357 # Structure is:
3358 # multipart/digest
3359 # message/rfc822
3360 # text/plain
3361 # message/rfc822
3362 # text/plain
3363 eq(msg.is_multipart(), 1)
3364 eq(len(msg.get_payload()), 2)
3365 part1 = msg.get_payload(0)
3366 eq(part1.get_content_type(), 'message/rfc822')
3367 eq(part1.is_multipart(), 1)
3368 eq(len(part1.get_payload()), 1)
3369 part1a = part1.get_payload(0)
3370 eq(part1a.is_multipart(), 0)
3371 eq(part1a.get_content_type(), 'text/plain')
3372 neq(part1a.get_payload(), 'message 1\n')
3373 # next message/rfc822
3374 part2 = msg.get_payload(1)
3375 eq(part2.get_content_type(), 'message/rfc822')
3376 eq(part2.is_multipart(), 1)
3377 eq(len(part2.get_payload()), 1)
3378 part2a = part2.get_payload(0)
3379 eq(part2a.is_multipart(), 0)
3380 eq(part2a.get_content_type(), 'text/plain')
3381 neq(part2a.get_payload(), 'message 2\n')
3382
3383 def test_three_lines(self):
3384 # A bug report by Andrew McNamara
3385 lines = ['From: Andrew Person <aperson@dom.ain',
3386 'Subject: Test',
3387 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3388 msg = email.message_from_string(NL.join(lines))
3389 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3390
3391 def test_strip_line_feed_and_carriage_return_in_headers(self):
3392 eq = self.assertEqual
3393 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3394 value1 = 'text'
3395 value2 = 'more text'
3396 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3397 value1, value2)
3398 msg = email.message_from_string(m)
3399 eq(msg.get('Header'), value1)
3400 eq(msg.get('Next-Header'), value2)
3401
3402 def test_rfc2822_header_syntax(self):
3403 eq = self.assertEqual
3404 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3405 msg = email.message_from_string(m)
3406 eq(len(msg), 3)
3407 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3408 eq(msg.get_payload(), 'body')
3409
3410 def test_rfc2822_space_not_allowed_in_header(self):
3411 eq = self.assertEqual
3412 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3413 msg = email.message_from_string(m)
3414 eq(len(msg.keys()), 0)
3415
3416 def test_rfc2822_one_character_header(self):
3417 eq = self.assertEqual
3418 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3419 msg = email.message_from_string(m)
3420 headers = msg.keys()
3421 headers.sort()
3422 eq(headers, ['A', 'B', 'CC'])
3423 eq(msg.get_payload(), 'body')
3424
R. David Murray45e0e142010-06-16 02:19:40 +00003425 def test_CRLFLF_at_end_of_part(self):
3426 # issue 5610: feedparser should not eat two chars from body part ending
3427 # with "\r\n\n".
3428 m = (
3429 "From: foo@bar.com\n"
3430 "To: baz\n"
3431 "Mime-Version: 1.0\n"
3432 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3433 "\n"
3434 "--BOUNDARY\n"
3435 "Content-Type: text/plain\n"
3436 "\n"
3437 "body ending with CRLF newline\r\n"
3438 "\n"
3439 "--BOUNDARY--\n"
3440 )
3441 msg = email.message_from_string(m)
3442 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003443
Ezio Melottib3aedd42010-11-20 19:04:17 +00003444
R. David Murray96fd54e2010-10-08 15:55:28 +00003445class Test8BitBytesHandling(unittest.TestCase):
3446 # In Python3 all input is string, but that doesn't work if the actual input
3447 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3448 # decode byte streams using the surrogateescape error handler, and
3449 # reconvert to binary at appropriate places if we detect surrogates. This
3450 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3451 # but it does allow us to parse and preserve them, and to decode body
3452 # parts that use an 8bit CTE.
3453
3454 bodytest_msg = textwrap.dedent("""\
3455 From: foo@bar.com
3456 To: baz
3457 Mime-Version: 1.0
3458 Content-Type: text/plain; charset={charset}
3459 Content-Transfer-Encoding: {cte}
3460
3461 {bodyline}
3462 """)
3463
3464 def test_known_8bit_CTE(self):
3465 m = self.bodytest_msg.format(charset='utf-8',
3466 cte='8bit',
3467 bodyline='pöstal').encode('utf-8')
3468 msg = email.message_from_bytes(m)
3469 self.assertEqual(msg.get_payload(), "pöstal\n")
3470 self.assertEqual(msg.get_payload(decode=True),
3471 "pöstal\n".encode('utf-8'))
3472
3473 def test_unknown_8bit_CTE(self):
3474 m = self.bodytest_msg.format(charset='notavalidcharset',
3475 cte='8bit',
3476 bodyline='pöstal').encode('utf-8')
3477 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003478 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003479 self.assertEqual(msg.get_payload(decode=True),
3480 "pöstal\n".encode('utf-8'))
3481
3482 def test_8bit_in_quopri_body(self):
3483 # This is non-RFC compliant data...without 'decode' the library code
3484 # decodes the body using the charset from the headers, and because the
3485 # source byte really is utf-8 this works. This is likely to fail
3486 # against real dirty data (ie: produce mojibake), but the data is
3487 # invalid anyway so it is as good a guess as any. But this means that
3488 # this test just confirms the current behavior; that behavior is not
3489 # necessarily the best possible behavior. With 'decode' it is
3490 # returning the raw bytes, so that test should be of correct behavior,
3491 # or at least produce the same result that email4 did.
3492 m = self.bodytest_msg.format(charset='utf-8',
3493 cte='quoted-printable',
3494 bodyline='p=C3=B6stál').encode('utf-8')
3495 msg = email.message_from_bytes(m)
3496 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3497 self.assertEqual(msg.get_payload(decode=True),
3498 'pöstál\n'.encode('utf-8'))
3499
3500 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3501 # This is similar to the previous test, but proves that if the 8bit
3502 # byte is undecodeable in the specified charset, it gets replaced
3503 # by the unicode 'unknown' character. Again, this may or may not
3504 # be the ideal behavior. Note that if decode=False none of the
3505 # decoders will get involved, so this is the only test we need
3506 # for this behavior.
3507 m = self.bodytest_msg.format(charset='ascii',
3508 cte='quoted-printable',
3509 bodyline='p=C3=B6stál').encode('utf-8')
3510 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003511 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003512 self.assertEqual(msg.get_payload(decode=True),
3513 'pöstál\n'.encode('utf-8'))
3514
R David Murray80e0aee2012-05-27 21:23:34 -04003515 # test_defect_handling:test_invalid_chars_in_base64_payload
R. David Murray96fd54e2010-10-08 15:55:28 +00003516 def test_8bit_in_base64_body(self):
R David Murray80e0aee2012-05-27 21:23:34 -04003517 # If we get 8bit bytes in a base64 body, we can just ignore them
3518 # as being outside the base64 alphabet and decode anyway. But
3519 # we register a defect.
R. David Murray96fd54e2010-10-08 15:55:28 +00003520 m = self.bodytest_msg.format(charset='utf-8',
3521 cte='base64',
3522 bodyline='cMO2c3RhbAá=').encode('utf-8')
3523 msg = email.message_from_bytes(m)
3524 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -04003525 'pöstal'.encode('utf-8'))
3526 self.assertIsInstance(msg.defects[0],
3527 errors.InvalidBase64CharactersDefect)
R. David Murray96fd54e2010-10-08 15:55:28 +00003528
3529 def test_8bit_in_uuencode_body(self):
3530 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3531 # normal means, so the block is returned undecoded, but as bytes.
3532 m = self.bodytest_msg.format(charset='utf-8',
3533 cte='uuencode',
3534 bodyline='<,.V<W1A; á ').encode('utf-8')
3535 msg = email.message_from_bytes(m)
3536 self.assertEqual(msg.get_payload(decode=True),
3537 '<,.V<W1A; á \n'.encode('utf-8'))
3538
3539
R. David Murray92532142011-01-07 23:25:30 +00003540 headertest_headers = (
3541 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3542 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3543 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3544 '\tJean de Baddie',
3545 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3546 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3547 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3548 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3549 )
3550 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3551 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003552
3553 def test_get_8bit_header(self):
3554 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003555 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3556 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003557
3558 def test_print_8bit_headers(self):
3559 msg = email.message_from_bytes(self.headertest_msg)
3560 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003561 textwrap.dedent("""\
3562 From: {}
3563 To: {}
3564 Subject: {}
3565 From: {}
3566
3567 Yes, they are flying.
3568 """).format(*[expected[1] for (_, expected) in
3569 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003570
3571 def test_values_with_8bit_headers(self):
3572 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003573 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003574 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003575 'b\uFFFD\uFFFDz',
3576 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3577 'coll\uFFFD\uFFFDgue, le pouf '
3578 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003579 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003580 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003581
3582 def test_items_with_8bit_headers(self):
3583 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003584 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003585 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003586 ('To', 'b\uFFFD\uFFFDz'),
3587 ('Subject', 'Maintenant je vous '
3588 'pr\uFFFD\uFFFDsente '
3589 'mon coll\uFFFD\uFFFDgue, le pouf '
3590 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3591 '\tJean de Baddie'),
3592 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003593
3594 def test_get_all_with_8bit_headers(self):
3595 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003596 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003597 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003598 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003599
R David Murraya2150232011-03-16 21:11:23 -04003600 def test_get_content_type_with_8bit(self):
3601 msg = email.message_from_bytes(textwrap.dedent("""\
3602 Content-Type: text/pl\xA7in; charset=utf-8
3603 """).encode('latin-1'))
3604 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3605 self.assertEqual(msg.get_content_maintype(), "text")
3606 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3607
R David Murray97f43c02012-06-24 05:03:27 -04003608 # test_headerregistry.TestContentTypeHeader.non_ascii_in_params
R David Murraya2150232011-03-16 21:11:23 -04003609 def test_get_params_with_8bit(self):
3610 msg = email.message_from_bytes(
3611 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3612 self.assertEqual(msg.get_params(header='x-header'),
3613 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3614 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3615 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3616 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3617
R David Murray97f43c02012-06-24 05:03:27 -04003618 # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value
R David Murraya2150232011-03-16 21:11:23 -04003619 def test_get_rfc2231_params_with_8bit(self):
3620 msg = email.message_from_bytes(textwrap.dedent("""\
3621 Content-Type: text/plain; charset=us-ascii;
3622 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3623 ).encode('latin-1'))
3624 self.assertEqual(msg.get_param('title'),
3625 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3626
3627 def test_set_rfc2231_params_with_8bit(self):
3628 msg = email.message_from_bytes(textwrap.dedent("""\
3629 Content-Type: text/plain; charset=us-ascii;
3630 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3631 ).encode('latin-1'))
3632 msg.set_param('title', 'test')
3633 self.assertEqual(msg.get_param('title'), 'test')
3634
3635 def test_del_rfc2231_params_with_8bit(self):
3636 msg = email.message_from_bytes(textwrap.dedent("""\
3637 Content-Type: text/plain; charset=us-ascii;
3638 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3639 ).encode('latin-1'))
3640 msg.del_param('title')
3641 self.assertEqual(msg.get_param('title'), None)
3642 self.assertEqual(msg.get_content_maintype(), 'text')
3643
3644 def test_get_payload_with_8bit_cte_header(self):
3645 msg = email.message_from_bytes(textwrap.dedent("""\
3646 Content-Transfer-Encoding: b\xa7se64
3647 Content-Type: text/plain; charset=latin-1
3648
3649 payload
3650 """).encode('latin-1'))
3651 self.assertEqual(msg.get_payload(), 'payload\n')
3652 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3653
R. David Murray96fd54e2010-10-08 15:55:28 +00003654 non_latin_bin_msg = textwrap.dedent("""\
3655 From: foo@bar.com
3656 To: báz
3657 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3658 \tJean de Baddie
3659 Mime-Version: 1.0
3660 Content-Type: text/plain; charset="utf-8"
3661 Content-Transfer-Encoding: 8bit
3662
3663 Да, они летят.
3664 """).encode('utf-8')
3665
3666 def test_bytes_generator(self):
3667 msg = email.message_from_bytes(self.non_latin_bin_msg)
3668 out = BytesIO()
3669 email.generator.BytesGenerator(out).flatten(msg)
3670 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3671
R. David Murray7372a072011-01-26 21:21:32 +00003672 def test_bytes_generator_handles_None_body(self):
3673 #Issue 11019
3674 msg = email.message.Message()
3675 out = BytesIO()
3676 email.generator.BytesGenerator(out).flatten(msg)
3677 self.assertEqual(out.getvalue(), b"\n")
3678
R. David Murray92532142011-01-07 23:25:30 +00003679 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003680 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003681 To: =?unknown-8bit?q?b=C3=A1z?=
3682 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3683 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3684 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003685 Mime-Version: 1.0
3686 Content-Type: text/plain; charset="utf-8"
3687 Content-Transfer-Encoding: base64
3688
3689 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3690 """)
3691
3692 def test_generator_handles_8bit(self):
3693 msg = email.message_from_bytes(self.non_latin_bin_msg)
3694 out = StringIO()
3695 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003696 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003697
3698 def test_bytes_generator_with_unix_from(self):
3699 # The unixfrom contains a current date, so we can't check it
3700 # literally. Just make sure the first word is 'From' and the
3701 # rest of the message matches the input.
3702 msg = email.message_from_bytes(self.non_latin_bin_msg)
3703 out = BytesIO()
3704 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3705 lines = out.getvalue().split(b'\n')
3706 self.assertEqual(lines[0].split()[0], b'From')
3707 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3708
R. David Murray92532142011-01-07 23:25:30 +00003709 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3710 non_latin_bin_msg_as7bit[2:4] = [
3711 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3712 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3713 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3714
R. David Murray96fd54e2010-10-08 15:55:28 +00003715 def test_message_from_binary_file(self):
3716 fn = 'test.msg'
3717 self.addCleanup(unlink, fn)
3718 with open(fn, 'wb') as testfile:
3719 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003720 with open(fn, 'rb') as testfile:
3721 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003722 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3723
3724 latin_bin_msg = textwrap.dedent("""\
3725 From: foo@bar.com
3726 To: Dinsdale
3727 Subject: Nudge nudge, wink, wink
3728 Mime-Version: 1.0
3729 Content-Type: text/plain; charset="latin-1"
3730 Content-Transfer-Encoding: 8bit
3731
3732 oh là là, know what I mean, know what I mean?
3733 """).encode('latin-1')
3734
3735 latin_bin_msg_as7bit = textwrap.dedent("""\
3736 From: foo@bar.com
3737 To: Dinsdale
3738 Subject: Nudge nudge, wink, wink
3739 Mime-Version: 1.0
3740 Content-Type: text/plain; charset="iso-8859-1"
3741 Content-Transfer-Encoding: quoted-printable
3742
3743 oh l=E0 l=E0, know what I mean, know what I mean?
3744 """)
3745
3746 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3747 m = email.message_from_bytes(self.latin_bin_msg)
3748 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3749
3750 def test_decoded_generator_emits_unicode_body(self):
3751 m = email.message_from_bytes(self.latin_bin_msg)
3752 out = StringIO()
3753 email.generator.DecodedGenerator(out).flatten(m)
3754 #DecodedHeader output contains an extra blank line compared
3755 #to the input message. RDM: not sure if this is a bug or not,
3756 #but it is not specific to the 8bit->7bit conversion.
3757 self.assertEqual(out.getvalue(),
3758 self.latin_bin_msg.decode('latin-1')+'\n')
3759
3760 def test_bytes_feedparser(self):
3761 bfp = email.feedparser.BytesFeedParser()
3762 for i in range(0, len(self.latin_bin_msg), 10):
3763 bfp.feed(self.latin_bin_msg[i:i+10])
3764 m = bfp.close()
3765 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3766
R. David Murray8451c4b2010-10-23 22:19:56 +00003767 def test_crlf_flatten(self):
3768 with openfile('msg_26.txt', 'rb') as fp:
3769 text = fp.read()
3770 msg = email.message_from_bytes(text)
3771 s = BytesIO()
3772 g = email.generator.BytesGenerator(s)
3773 g.flatten(msg, linesep='\r\n')
3774 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003775
3776 def test_8bit_multipart(self):
3777 # Issue 11605
3778 source = textwrap.dedent("""\
3779 Date: Fri, 18 Mar 2011 17:15:43 +0100
3780 To: foo@example.com
3781 From: foodwatch-Newsletter <bar@example.com>
3782 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3783 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3784 MIME-Version: 1.0
3785 Content-Type: multipart/alternative;
3786 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3787
3788 --b1_76a486bee62b0d200f33dc2ca08220ad
3789 Content-Type: text/plain; charset="utf-8"
3790 Content-Transfer-Encoding: 8bit
3791
3792 Guten Tag, ,
3793
3794 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3795 Nachrichten aus Japan.
3796
3797
3798 --b1_76a486bee62b0d200f33dc2ca08220ad
3799 Content-Type: text/html; charset="utf-8"
3800 Content-Transfer-Encoding: 8bit
3801
3802 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3803 "http://www.w3.org/TR/html4/loose.dtd">
3804 <html lang="de">
3805 <head>
3806 <title>foodwatch - Newsletter</title>
3807 </head>
3808 <body>
3809 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3810 die Nachrichten aus Japan.</p>
3811 </body>
3812 </html>
3813 --b1_76a486bee62b0d200f33dc2ca08220ad--
3814
3815 """).encode('utf-8')
3816 msg = email.message_from_bytes(source)
3817 s = BytesIO()
3818 g = email.generator.BytesGenerator(s)
3819 g.flatten(msg)
3820 self.assertEqual(s.getvalue(), source)
3821
R David Murray9fd170e2012-03-14 14:05:03 -04003822 def test_bytes_generator_b_encoding_linesep(self):
3823 # Issue 14062: b encoding was tacking on an extra \n.
3824 m = Message()
3825 # This has enough non-ascii that it should always end up b encoded.
3826 m['Subject'] = Header('žluťoučký kůň')
3827 s = BytesIO()
3828 g = email.generator.BytesGenerator(s)
3829 g.flatten(m, linesep='\r\n')
3830 self.assertEqual(
3831 s.getvalue(),
3832 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3833
3834 def test_generator_b_encoding_linesep(self):
3835 # Since this broke in ByteGenerator, test Generator for completeness.
3836 m = Message()
3837 # This has enough non-ascii that it should always end up b encoded.
3838 m['Subject'] = Header('žluťoučký kůň')
3839 s = StringIO()
3840 g = email.generator.Generator(s)
3841 g.flatten(m, linesep='\r\n')
3842 self.assertEqual(
3843 s.getvalue(),
3844 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3845
R. David Murray8451c4b2010-10-23 22:19:56 +00003846 maxDiff = None
3847
Ezio Melottib3aedd42010-11-20 19:04:17 +00003848
R. David Murray719a4492010-11-21 16:53:48 +00003849class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003850
R. David Murraye5db2632010-11-20 15:10:13 +00003851 maxDiff = None
3852
R. David Murray96fd54e2010-10-08 15:55:28 +00003853 def _msgobj(self, filename):
3854 with openfile(filename, 'rb') as fp:
3855 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003856 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003857 msg = email.message_from_bytes(data)
3858 return msg, data
3859
R. David Murray719a4492010-11-21 16:53:48 +00003860 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003861 b = BytesIO()
3862 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003863 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R David Murraya46ed112011-03-31 13:11:40 -04003864 self.assertEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003865
3866
R. David Murray719a4492010-11-21 16:53:48 +00003867class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3868 TestIdempotent):
3869 linesep = '\n'
3870 blinesep = b'\n'
3871 normalize_linesep_regex = re.compile(br'\r\n')
3872
3873
3874class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3875 TestIdempotent):
3876 linesep = '\r\n'
3877 blinesep = b'\r\n'
3878 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3879
Ezio Melottib3aedd42010-11-20 19:04:17 +00003880
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003881class TestBase64(unittest.TestCase):
3882 def test_len(self):
3883 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003884 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003885 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003886 for size in range(15):
3887 if size == 0 : bsize = 0
3888 elif size <= 3 : bsize = 4
3889 elif size <= 6 : bsize = 8
3890 elif size <= 9 : bsize = 12
3891 elif size <= 12: bsize = 16
3892 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003893 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003894
3895 def test_decode(self):
3896 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003897 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003898 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003899
3900 def test_encode(self):
3901 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003902 eq(base64mime.body_encode(b''), b'')
3903 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003904 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003905 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003906 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003907 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003908eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3909eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3910eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3911eHh4eCB4eHh4IA==
3912""")
3913 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003914 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003915 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003916eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3917eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3918eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3919eHh4eCB4eHh4IA==\r
3920""")
3921
3922 def test_header_encode(self):
3923 eq = self.assertEqual
3924 he = base64mime.header_encode
3925 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003926 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3927 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003928 # Test the charset option
3929 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3930 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003931
3932
Ezio Melottib3aedd42010-11-20 19:04:17 +00003933
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003934class TestQuopri(unittest.TestCase):
3935 def setUp(self):
3936 # Set of characters (as byte integers) that don't need to be encoded
3937 # in headers.
3938 self.hlit = list(chain(
3939 range(ord('a'), ord('z') + 1),
3940 range(ord('A'), ord('Z') + 1),
3941 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003942 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003943 # Set of characters (as byte integers) that do need to be encoded in
3944 # headers.
3945 self.hnon = [c for c in range(256) if c not in self.hlit]
3946 assert len(self.hlit) + len(self.hnon) == 256
3947 # Set of characters (as byte integers) that don't need to be encoded
3948 # in bodies.
3949 self.blit = list(range(ord(' '), ord('~') + 1))
3950 self.blit.append(ord('\t'))
3951 self.blit.remove(ord('='))
3952 # Set of characters (as byte integers) that do need to be encoded in
3953 # bodies.
3954 self.bnon = [c for c in range(256) if c not in self.blit]
3955 assert len(self.blit) + len(self.bnon) == 256
3956
Guido van Rossum9604e662007-08-30 03:46:43 +00003957 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003958 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003959 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003960 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003961 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003962 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003963 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003964
Guido van Rossum9604e662007-08-30 03:46:43 +00003965 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003966 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003967 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003968 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003969 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003970 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003971 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003972
3973 def test_header_quopri_len(self):
3974 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003975 eq(quoprimime.header_length(b'hello'), 5)
3976 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003977 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003978 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003979 # =?xxx?q?...?= means 10 extra characters
3980 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003981 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3982 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003983 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003984 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003985 # =?xxx?q?...?= means 10 extra characters
3986 10)
3987 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003988 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003989 'expected length 1 for %r' % chr(c))
3990 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003991 # Space is special; it's encoded to _
3992 if c == ord(' '):
3993 continue
3994 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003995 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003996 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003997
3998 def test_body_quopri_len(self):
3999 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004000 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00004001 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004002 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00004003 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004004
4005 def test_quote_unquote_idempotent(self):
4006 for x in range(256):
4007 c = chr(x)
4008 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
4009
R David Murrayec1b5b82011-03-23 14:19:05 -04004010 def _test_header_encode(self, header, expected_encoded_header, charset=None):
4011 if charset is None:
4012 encoded_header = quoprimime.header_encode(header)
4013 else:
4014 encoded_header = quoprimime.header_encode(header, charset)
4015 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004016
R David Murraycafd79d2011-03-23 15:25:55 -04004017 def test_header_encode_null(self):
4018 self._test_header_encode(b'', '')
4019
R David Murrayec1b5b82011-03-23 14:19:05 -04004020 def test_header_encode_one_word(self):
4021 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
4022
4023 def test_header_encode_two_lines(self):
4024 self._test_header_encode(b'hello\nworld',
4025 '=?iso-8859-1?q?hello=0Aworld?=')
4026
4027 def test_header_encode_non_ascii(self):
4028 self._test_header_encode(b'hello\xc7there',
4029 '=?iso-8859-1?q?hello=C7there?=')
4030
4031 def test_header_encode_alt_charset(self):
4032 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
4033 charset='iso-8859-2')
4034
4035 def _test_header_decode(self, encoded_header, expected_decoded_header):
4036 decoded_header = quoprimime.header_decode(encoded_header)
4037 self.assertEqual(decoded_header, expected_decoded_header)
4038
4039 def test_header_decode_null(self):
4040 self._test_header_decode('', '')
4041
4042 def test_header_decode_one_word(self):
4043 self._test_header_decode('hello', 'hello')
4044
4045 def test_header_decode_two_lines(self):
4046 self._test_header_decode('hello=0Aworld', 'hello\nworld')
4047
4048 def test_header_decode_non_ascii(self):
4049 self._test_header_decode('hello=C7there', 'hello\xc7there')
4050
Ezio Melotti2a99d5d2013-07-06 17:16:04 +02004051 def test_header_decode_re_bug_18380(self):
4052 # Issue 18380: Call re.sub with a positional argument for flags in the wrong position
4053 self.assertEqual(quoprimime.header_decode('=30' * 257), '0' * 257)
4054
R David Murrayec1b5b82011-03-23 14:19:05 -04004055 def _test_decode(self, encoded, expected_decoded, eol=None):
4056 if eol is None:
4057 decoded = quoprimime.decode(encoded)
4058 else:
4059 decoded = quoprimime.decode(encoded, eol=eol)
4060 self.assertEqual(decoded, expected_decoded)
4061
4062 def test_decode_null_word(self):
4063 self._test_decode('', '')
4064
4065 def test_decode_null_line_null_word(self):
4066 self._test_decode('\r\n', '\n')
4067
4068 def test_decode_one_word(self):
4069 self._test_decode('hello', 'hello')
4070
4071 def test_decode_one_word_eol(self):
4072 self._test_decode('hello', 'hello', eol='X')
4073
4074 def test_decode_one_line(self):
4075 self._test_decode('hello\r\n', 'hello\n')
4076
4077 def test_decode_one_line_lf(self):
4078 self._test_decode('hello\n', 'hello\n')
4079
R David Murraycafd79d2011-03-23 15:25:55 -04004080 def test_decode_one_line_cr(self):
4081 self._test_decode('hello\r', 'hello\n')
4082
4083 def test_decode_one_line_nl(self):
4084 self._test_decode('hello\n', 'helloX', eol='X')
4085
4086 def test_decode_one_line_crnl(self):
4087 self._test_decode('hello\r\n', 'helloX', eol='X')
4088
R David Murrayec1b5b82011-03-23 14:19:05 -04004089 def test_decode_one_line_one_word(self):
4090 self._test_decode('hello\r\nworld', 'hello\nworld')
4091
4092 def test_decode_one_line_one_word_eol(self):
4093 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
4094
4095 def test_decode_two_lines(self):
4096 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
4097
R David Murraycafd79d2011-03-23 15:25:55 -04004098 def test_decode_two_lines_eol(self):
4099 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
4100
R David Murrayec1b5b82011-03-23 14:19:05 -04004101 def test_decode_one_long_line(self):
4102 self._test_decode('Spam' * 250, 'Spam' * 250)
4103
4104 def test_decode_one_space(self):
4105 self._test_decode(' ', '')
4106
4107 def test_decode_multiple_spaces(self):
4108 self._test_decode(' ' * 5, '')
4109
4110 def test_decode_one_line_trailing_spaces(self):
4111 self._test_decode('hello \r\n', 'hello\n')
4112
4113 def test_decode_two_lines_trailing_spaces(self):
4114 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
4115
4116 def test_decode_quoted_word(self):
4117 self._test_decode('=22quoted=20words=22', '"quoted words"')
4118
4119 def test_decode_uppercase_quoting(self):
4120 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
4121
4122 def test_decode_lowercase_quoting(self):
4123 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
4124
4125 def test_decode_soft_line_break(self):
4126 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
4127
4128 def test_decode_false_quoting(self):
4129 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
4130
4131 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
4132 kwargs = {}
4133 if maxlinelen is None:
4134 # Use body_encode's default.
4135 maxlinelen = 76
4136 else:
4137 kwargs['maxlinelen'] = maxlinelen
4138 if eol is None:
4139 # Use body_encode's default.
4140 eol = '\n'
4141 else:
4142 kwargs['eol'] = eol
4143 encoded_body = quoprimime.body_encode(body, **kwargs)
4144 self.assertEqual(encoded_body, expected_encoded_body)
4145 if eol == '\n' or eol == '\r\n':
4146 # We know how to split the result back into lines, so maxlinelen
4147 # can be checked.
4148 for line in encoded_body.splitlines():
4149 self.assertLessEqual(len(line), maxlinelen)
4150
4151 def test_encode_null(self):
4152 self._test_encode('', '')
4153
4154 def test_encode_null_lines(self):
4155 self._test_encode('\n\n', '\n\n')
4156
4157 def test_encode_one_line(self):
4158 self._test_encode('hello\n', 'hello\n')
4159
4160 def test_encode_one_line_crlf(self):
4161 self._test_encode('hello\r\n', 'hello\n')
4162
4163 def test_encode_one_line_eol(self):
4164 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
4165
4166 def test_encode_one_space(self):
4167 self._test_encode(' ', '=20')
4168
4169 def test_encode_one_line_one_space(self):
4170 self._test_encode(' \n', '=20\n')
4171
R David Murrayb938c8c2011-03-24 12:19:26 -04004172# XXX: body_encode() expect strings, but uses ord(char) from these strings
4173# to index into a 256-entry list. For code points above 255, this will fail.
4174# Should there be a check for 8-bit only ord() values in body, or at least
4175# a comment about the expected input?
4176
4177 def test_encode_two_lines_one_space(self):
4178 self._test_encode(' \n \n', '=20\n=20\n')
4179
R David Murrayec1b5b82011-03-23 14:19:05 -04004180 def test_encode_one_word_trailing_spaces(self):
4181 self._test_encode('hello ', 'hello =20')
4182
4183 def test_encode_one_line_trailing_spaces(self):
4184 self._test_encode('hello \n', 'hello =20\n')
4185
4186 def test_encode_one_word_trailing_tab(self):
4187 self._test_encode('hello \t', 'hello =09')
4188
4189 def test_encode_one_line_trailing_tab(self):
4190 self._test_encode('hello \t\n', 'hello =09\n')
4191
4192 def test_encode_trailing_space_before_maxlinelen(self):
4193 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
4194
R David Murrayb938c8c2011-03-24 12:19:26 -04004195 def test_encode_trailing_space_at_maxlinelen(self):
4196 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
4197
R David Murrayec1b5b82011-03-23 14:19:05 -04004198 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04004199 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
4200
4201 def test_encode_whitespace_lines(self):
4202 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04004203
4204 def test_encode_quoted_equals(self):
4205 self._test_encode('a = b', 'a =3D b')
4206
4207 def test_encode_one_long_string(self):
4208 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
4209
4210 def test_encode_one_long_line(self):
4211 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4212
4213 def test_encode_one_very_long_line(self):
4214 self._test_encode('x' * 200 + '\n',
4215 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
4216
4217 def test_encode_one_long_line(self):
4218 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4219
4220 def test_encode_shortest_maxlinelen(self):
4221 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004222
R David Murrayb938c8c2011-03-24 12:19:26 -04004223 def test_encode_maxlinelen_too_small(self):
4224 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
4225
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004226 def test_encode(self):
4227 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00004228 eq(quoprimime.body_encode(''), '')
4229 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004230 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00004231 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004232 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00004233 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004234xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
4235 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
4236x xxxx xxxx xxxx xxxx=20""")
4237 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00004238 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4239 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004240xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
4241 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
4242x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00004243 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004244one line
4245
4246two line"""), """\
4247one line
4248
4249two line""")
4250
4251
Ezio Melottib3aedd42010-11-20 19:04:17 +00004252
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004253# Test the Charset class
4254class TestCharset(unittest.TestCase):
4255 def tearDown(self):
4256 from email import charset as CharsetModule
4257 try:
4258 del CharsetModule.CHARSETS['fake']
4259 except KeyError:
4260 pass
4261
Guido van Rossum9604e662007-08-30 03:46:43 +00004262 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004263 eq = self.assertEqual
4264 # Make sure us-ascii = no Unicode conversion
4265 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00004266 eq(c.header_encode('Hello World!'), 'Hello World!')
4267 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004268 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00004269 self.assertRaises(UnicodeError, c.header_encode, s)
4270 c = Charset('utf-8')
4271 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004272
4273 def test_body_encode(self):
4274 eq = self.assertEqual
4275 # Try a charset with QP body encoding
4276 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004277 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004278 # Try a charset with Base64 body encoding
4279 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00004280 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004281 # Try a charset with None body encoding
4282 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004283 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004284 # Try the convert argument, where input codec != output codec
4285 c = Charset('euc-jp')
4286 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00004287 # XXX FIXME
4288## try:
4289## eq('\x1b$B5FCO;~IW\x1b(B',
4290## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4291## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4292## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4293## except LookupError:
4294## # We probably don't have the Japanese codecs installed
4295## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004296 # Testing SF bug #625509, which we have to fake, since there are no
4297 # built-in encodings where the header encoding is QP but the body
4298 # encoding is not.
4299 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04004300 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004301 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04004302 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004303
4304 def test_unicode_charset_name(self):
4305 charset = Charset('us-ascii')
4306 self.assertEqual(str(charset), 'us-ascii')
4307 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4308
4309
Ezio Melottib3aedd42010-11-20 19:04:17 +00004310
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004311# Test multilingual MIME headers.
4312class TestHeader(TestEmailBase):
4313 def test_simple(self):
4314 eq = self.ndiffAssertEqual
4315 h = Header('Hello World!')
4316 eq(h.encode(), 'Hello World!')
4317 h.append(' Goodbye World!')
4318 eq(h.encode(), 'Hello World! Goodbye World!')
4319
4320 def test_simple_surprise(self):
4321 eq = self.ndiffAssertEqual
4322 h = Header('Hello World!')
4323 eq(h.encode(), 'Hello World!')
4324 h.append('Goodbye World!')
4325 eq(h.encode(), 'Hello World! Goodbye World!')
4326
4327 def test_header_needs_no_decoding(self):
4328 h = 'no decoding needed'
4329 self.assertEqual(decode_header(h), [(h, None)])
4330
4331 def test_long(self):
4332 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4333 maxlinelen=76)
4334 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004335 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004336
4337 def test_multilingual(self):
4338 eq = self.ndiffAssertEqual
4339 g = Charset("iso-8859-1")
4340 cz = Charset("iso-8859-2")
4341 utf8 = Charset("utf-8")
4342 g_head = (b'Die Mieter treten hier ein werden mit einem '
4343 b'Foerderband komfortabel den Korridor entlang, '
4344 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4345 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4346 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4347 b'd\xf9vtipu.. ')
4348 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4349 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4350 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4351 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4352 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4353 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4354 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4355 '\u3044\u307e\u3059\u3002')
4356 h = Header(g_head, g)
4357 h.append(cz_head, cz)
4358 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004359 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004360 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004361=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4362 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4363 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4364 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004365 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4366 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4367 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4368 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004369 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4370 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4371 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4372 decoded = decode_header(enc)
4373 eq(len(decoded), 3)
4374 eq(decoded[0], (g_head, 'iso-8859-1'))
4375 eq(decoded[1], (cz_head, 'iso-8859-2'))
4376 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004377 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004378 eq(ustr,
4379 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4380 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4381 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4382 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4383 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4384 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4385 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4386 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4387 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4388 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4389 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4390 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4391 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4392 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4393 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4394 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4395 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004396 # Test make_header()
4397 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004398 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004399
4400 def test_empty_header_encode(self):
4401 h = Header()
4402 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004403
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004404 def test_header_ctor_default_args(self):
4405 eq = self.ndiffAssertEqual
4406 h = Header()
4407 eq(h, '')
4408 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004409 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004410
4411 def test_explicit_maxlinelen(self):
4412 eq = self.ndiffAssertEqual
4413 hstr = ('A very long line that must get split to something other '
4414 'than at the 76th character boundary to test the non-default '
4415 'behavior')
4416 h = Header(hstr)
4417 eq(h.encode(), '''\
4418A very long line that must get split to something other than at the 76th
4419 character boundary to test the non-default behavior''')
4420 eq(str(h), hstr)
4421 h = Header(hstr, header_name='Subject')
4422 eq(h.encode(), '''\
4423A very long line that must get split to something other than at the
4424 76th character boundary to test the non-default behavior''')
4425 eq(str(h), hstr)
4426 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4427 eq(h.encode(), hstr)
4428 eq(str(h), hstr)
4429
Guido van Rossum9604e662007-08-30 03:46:43 +00004430 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004431 eq = self.ndiffAssertEqual
4432 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004433 x = 'xxxx ' * 20
4434 h.append(x)
4435 s = h.encode()
4436 eq(s, """\
4437=?iso-8859-1?q?xxx?=
4438 =?iso-8859-1?q?x_?=
4439 =?iso-8859-1?q?xx?=
4440 =?iso-8859-1?q?xx?=
4441 =?iso-8859-1?q?_x?=
4442 =?iso-8859-1?q?xx?=
4443 =?iso-8859-1?q?x_?=
4444 =?iso-8859-1?q?xx?=
4445 =?iso-8859-1?q?xx?=
4446 =?iso-8859-1?q?_x?=
4447 =?iso-8859-1?q?xx?=
4448 =?iso-8859-1?q?x_?=
4449 =?iso-8859-1?q?xx?=
4450 =?iso-8859-1?q?xx?=
4451 =?iso-8859-1?q?_x?=
4452 =?iso-8859-1?q?xx?=
4453 =?iso-8859-1?q?x_?=
4454 =?iso-8859-1?q?xx?=
4455 =?iso-8859-1?q?xx?=
4456 =?iso-8859-1?q?_x?=
4457 =?iso-8859-1?q?xx?=
4458 =?iso-8859-1?q?x_?=
4459 =?iso-8859-1?q?xx?=
4460 =?iso-8859-1?q?xx?=
4461 =?iso-8859-1?q?_x?=
4462 =?iso-8859-1?q?xx?=
4463 =?iso-8859-1?q?x_?=
4464 =?iso-8859-1?q?xx?=
4465 =?iso-8859-1?q?xx?=
4466 =?iso-8859-1?q?_x?=
4467 =?iso-8859-1?q?xx?=
4468 =?iso-8859-1?q?x_?=
4469 =?iso-8859-1?q?xx?=
4470 =?iso-8859-1?q?xx?=
4471 =?iso-8859-1?q?_x?=
4472 =?iso-8859-1?q?xx?=
4473 =?iso-8859-1?q?x_?=
4474 =?iso-8859-1?q?xx?=
4475 =?iso-8859-1?q?xx?=
4476 =?iso-8859-1?q?_x?=
4477 =?iso-8859-1?q?xx?=
4478 =?iso-8859-1?q?x_?=
4479 =?iso-8859-1?q?xx?=
4480 =?iso-8859-1?q?xx?=
4481 =?iso-8859-1?q?_x?=
4482 =?iso-8859-1?q?xx?=
4483 =?iso-8859-1?q?x_?=
4484 =?iso-8859-1?q?xx?=
4485 =?iso-8859-1?q?xx?=
4486 =?iso-8859-1?q?_?=""")
4487 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004488 h = Header(charset='iso-8859-1', maxlinelen=40)
4489 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004490 s = h.encode()
4491 eq(s, """\
4492=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4493 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4494 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4495 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4496 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4497 eq(x, str(make_header(decode_header(s))))
4498
4499 def test_base64_splittable(self):
4500 eq = self.ndiffAssertEqual
4501 h = Header(charset='koi8-r', maxlinelen=20)
4502 x = 'xxxx ' * 20
4503 h.append(x)
4504 s = h.encode()
4505 eq(s, """\
4506=?koi8-r?b?eHh4?=
4507 =?koi8-r?b?eCB4?=
4508 =?koi8-r?b?eHh4?=
4509 =?koi8-r?b?IHh4?=
4510 =?koi8-r?b?eHgg?=
4511 =?koi8-r?b?eHh4?=
4512 =?koi8-r?b?eCB4?=
4513 =?koi8-r?b?eHh4?=
4514 =?koi8-r?b?IHh4?=
4515 =?koi8-r?b?eHgg?=
4516 =?koi8-r?b?eHh4?=
4517 =?koi8-r?b?eCB4?=
4518 =?koi8-r?b?eHh4?=
4519 =?koi8-r?b?IHh4?=
4520 =?koi8-r?b?eHgg?=
4521 =?koi8-r?b?eHh4?=
4522 =?koi8-r?b?eCB4?=
4523 =?koi8-r?b?eHh4?=
4524 =?koi8-r?b?IHh4?=
4525 =?koi8-r?b?eHgg?=
4526 =?koi8-r?b?eHh4?=
4527 =?koi8-r?b?eCB4?=
4528 =?koi8-r?b?eHh4?=
4529 =?koi8-r?b?IHh4?=
4530 =?koi8-r?b?eHgg?=
4531 =?koi8-r?b?eHh4?=
4532 =?koi8-r?b?eCB4?=
4533 =?koi8-r?b?eHh4?=
4534 =?koi8-r?b?IHh4?=
4535 =?koi8-r?b?eHgg?=
4536 =?koi8-r?b?eHh4?=
4537 =?koi8-r?b?eCB4?=
4538 =?koi8-r?b?eHh4?=
4539 =?koi8-r?b?IA==?=""")
4540 eq(x, str(make_header(decode_header(s))))
4541 h = Header(charset='koi8-r', maxlinelen=40)
4542 h.append(x)
4543 s = h.encode()
4544 eq(s, """\
4545=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4546 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4547 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4548 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4549 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4550 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4551 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004552
4553 def test_us_ascii_header(self):
4554 eq = self.assertEqual
4555 s = 'hello'
4556 x = decode_header(s)
4557 eq(x, [('hello', None)])
4558 h = make_header(x)
4559 eq(s, h.encode())
4560
4561 def test_string_charset(self):
4562 eq = self.assertEqual
4563 h = Header()
4564 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004565 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004566
4567## def test_unicode_error(self):
4568## raises = self.assertRaises
4569## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4570## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4571## h = Header()
4572## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4573## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4574## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4575
4576 def test_utf8_shortest(self):
4577 eq = self.assertEqual
4578 h = Header('p\xf6stal', 'utf-8')
4579 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4580 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4581 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4582
4583 def test_bad_8bit_header(self):
4584 raises = self.assertRaises
4585 eq = self.assertEqual
4586 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4587 raises(UnicodeError, Header, x)
4588 h = Header()
4589 raises(UnicodeError, h.append, x)
4590 e = x.decode('utf-8', 'replace')
4591 eq(str(Header(x, errors='replace')), e)
4592 h.append(x, errors='replace')
4593 eq(str(h), e)
4594
R David Murray041015c2011-03-25 15:10:55 -04004595 def test_escaped_8bit_header(self):
4596 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004597 e = x.decode('ascii', 'surrogateescape')
4598 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004599 self.assertEqual(str(h),
4600 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4601 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4602
R David Murraye5e366c2011-06-18 12:57:28 -04004603 def test_header_handles_binary_unknown8bit(self):
4604 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4605 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4606 self.assertEqual(str(h),
4607 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4608 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4609
4610 def test_make_header_handles_binary_unknown8bit(self):
4611 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4612 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4613 h2 = email.header.make_header(email.header.decode_header(h))
4614 self.assertEqual(str(h2),
4615 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4616 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4617
R David Murray041015c2011-03-25 15:10:55 -04004618 def test_modify_returned_list_does_not_change_header(self):
4619 h = Header('test')
4620 chunks = email.header.decode_header(h)
4621 chunks.append(('ascii', 'test2'))
4622 self.assertEqual(str(h), 'test')
4623
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004624 def test_encoded_adjacent_nonencoded(self):
4625 eq = self.assertEqual
4626 h = Header()
4627 h.append('hello', 'iso-8859-1')
4628 h.append('world')
4629 s = h.encode()
4630 eq(s, '=?iso-8859-1?q?hello?= world')
4631 h = make_header(decode_header(s))
4632 eq(h.encode(), s)
4633
R David Murray07ea53c2012-06-02 17:56:49 -04004634 def test_whitespace_keeper(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004635 eq = self.assertEqual
4636 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4637 parts = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04004638 eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004639 hdr = make_header(parts)
4640 eq(hdr.encode(),
4641 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4642
4643 def test_broken_base64_header(self):
4644 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004645 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004646 raises(errors.HeaderParseError, decode_header, s)
4647
R. David Murray477efb32011-01-05 01:39:32 +00004648 def test_shift_jis_charset(self):
4649 h = Header('文', charset='shift_jis')
4650 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4651
R David Murrayde912762011-03-16 18:26:23 -04004652 def test_flatten_header_with_no_value(self):
4653 # Issue 11401 (regression from email 4.x) Note that the space after
4654 # the header doesn't reflect the input, but this is also the way
4655 # email 4.x behaved. At some point it would be nice to fix that.
4656 msg = email.message_from_string("EmptyHeader:")
4657 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4658
R David Murray01581ee2011-04-18 10:04:34 -04004659 def test_encode_preserves_leading_ws_on_value(self):
4660 msg = Message()
4661 msg['SomeHeader'] = ' value with leading ws'
4662 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4663
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004664
Ezio Melottib3aedd42010-11-20 19:04:17 +00004665
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004666# Test RFC 2231 header parameters (en/de)coding
4667class TestRFC2231(TestEmailBase):
R David Murray97f43c02012-06-24 05:03:27 -04004668
4669 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
4670 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004671 def test_get_param(self):
4672 eq = self.assertEqual
4673 msg = self._msgobj('msg_29.txt')
4674 eq(msg.get_param('title'),
4675 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4676 eq(msg.get_param('title', unquote=False),
4677 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4678
4679 def test_set_param(self):
4680 eq = self.ndiffAssertEqual
4681 msg = Message()
4682 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4683 charset='us-ascii')
4684 eq(msg.get_param('title'),
4685 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4686 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4687 charset='us-ascii', language='en')
4688 eq(msg.get_param('title'),
4689 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4690 msg = self._msgobj('msg_01.txt')
4691 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4692 charset='us-ascii', language='en')
4693 eq(msg.as_string(maxheaderlen=78), """\
4694Return-Path: <bbb@zzz.org>
4695Delivered-To: bbb@zzz.org
4696Received: by mail.zzz.org (Postfix, from userid 889)
4697\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4698MIME-Version: 1.0
4699Content-Transfer-Encoding: 7bit
4700Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4701From: bbb@ddd.com (John X. Doe)
4702To: bbb@zzz.org
4703Subject: This is a test message
4704Date: Fri, 4 May 2001 14:05:44 -0400
4705Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004706 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004707
4708
4709Hi,
4710
4711Do you like this message?
4712
4713-Me
4714""")
4715
R David Murraya2860e82011-04-16 09:20:30 -04004716 def test_set_param_requote(self):
4717 msg = Message()
4718 msg.set_param('title', 'foo')
4719 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4720 msg.set_param('title', 'bar', requote=False)
4721 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4722 # tspecial is still quoted.
4723 msg.set_param('title', "(bar)bell", requote=False)
4724 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4725
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004726 def test_del_param(self):
4727 eq = self.ndiffAssertEqual
4728 msg = self._msgobj('msg_01.txt')
4729 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4730 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4731 charset='us-ascii', language='en')
4732 msg.del_param('foo', header='Content-Type')
4733 eq(msg.as_string(maxheaderlen=78), """\
4734Return-Path: <bbb@zzz.org>
4735Delivered-To: bbb@zzz.org
4736Received: by mail.zzz.org (Postfix, from userid 889)
4737\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4738MIME-Version: 1.0
4739Content-Transfer-Encoding: 7bit
4740Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4741From: bbb@ddd.com (John X. Doe)
4742To: bbb@zzz.org
4743Subject: This is a test message
4744Date: Fri, 4 May 2001 14:05:44 -0400
4745Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004746 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004747
4748
4749Hi,
4750
4751Do you like this message?
4752
4753-Me
4754""")
4755
R David Murray97f43c02012-06-24 05:03:27 -04004756 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset
4757 # I changed the charset name, though, because the one in the file isn't
4758 # a legal charset name. Should add a test for an illegal charset.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004759 def test_rfc2231_get_content_charset(self):
4760 eq = self.assertEqual
4761 msg = self._msgobj('msg_32.txt')
4762 eq(msg.get_content_charset(), 'us-ascii')
4763
R David Murray97f43c02012-06-24 05:03:27 -04004764 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes
R. David Murraydfd7eb02010-12-24 22:36:49 +00004765 def test_rfc2231_parse_rfc_quoting(self):
4766 m = textwrap.dedent('''\
4767 Content-Disposition: inline;
4768 \tfilename*0*=''This%20is%20even%20more%20;
4769 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4770 \tfilename*2="is it not.pdf"
4771
4772 ''')
4773 msg = email.message_from_string(m)
4774 self.assertEqual(msg.get_filename(),
4775 'This is even more ***fun*** is it not.pdf')
4776 self.assertEqual(m, msg.as_string())
4777
R David Murray97f43c02012-06-24 05:03:27 -04004778 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
R. David Murraydfd7eb02010-12-24 22:36:49 +00004779 def test_rfc2231_parse_extra_quoting(self):
4780 m = textwrap.dedent('''\
4781 Content-Disposition: inline;
4782 \tfilename*0*="''This%20is%20even%20more%20";
4783 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4784 \tfilename*2="is it not.pdf"
4785
4786 ''')
4787 msg = email.message_from_string(m)
4788 self.assertEqual(msg.get_filename(),
4789 'This is even more ***fun*** is it not.pdf')
4790 self.assertEqual(m, msg.as_string())
4791
R David Murray97f43c02012-06-24 05:03:27 -04004792 # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset
4793 # but new test uses *0* because otherwise lang/charset is not valid.
4794 # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004795 def test_rfc2231_no_language_or_charset(self):
4796 m = '''\
4797Content-Transfer-Encoding: 8bit
4798Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4799Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4800
4801'''
4802 msg = email.message_from_string(m)
4803 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004804 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004805 self.assertEqual(
4806 param,
4807 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4808
R David Murray97f43c02012-06-24 05:03:27 -04004809 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004810 def test_rfc2231_no_language_or_charset_in_filename(self):
4811 m = '''\
4812Content-Disposition: inline;
4813\tfilename*0*="''This%20is%20even%20more%20";
4814\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4815\tfilename*2="is it not.pdf"
4816
4817'''
4818 msg = email.message_from_string(m)
4819 self.assertEqual(msg.get_filename(),
4820 'This is even more ***fun*** is it not.pdf')
4821
R David Murray97f43c02012-06-24 05:03:27 -04004822 # Duplicate of previous test?
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004823 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4824 m = '''\
4825Content-Disposition: inline;
4826\tfilename*0*="''This%20is%20even%20more%20";
4827\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4828\tfilename*2="is it not.pdf"
4829
4830'''
4831 msg = email.message_from_string(m)
4832 self.assertEqual(msg.get_filename(),
4833 'This is even more ***fun*** is it not.pdf')
4834
R David Murray97f43c02012-06-24 05:03:27 -04004835 # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded,
4836 # but the test below is wrong (the first part should be decoded).
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004837 def test_rfc2231_partly_encoded(self):
4838 m = '''\
4839Content-Disposition: inline;
4840\tfilename*0="''This%20is%20even%20more%20";
4841\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4842\tfilename*2="is it not.pdf"
4843
4844'''
4845 msg = email.message_from_string(m)
4846 self.assertEqual(
4847 msg.get_filename(),
4848 'This%20is%20even%20more%20***fun*** is it not.pdf')
4849
4850 def test_rfc2231_partly_nonencoded(self):
4851 m = '''\
4852Content-Disposition: inline;
4853\tfilename*0="This%20is%20even%20more%20";
4854\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4855\tfilename*2="is it not.pdf"
4856
4857'''
4858 msg = email.message_from_string(m)
4859 self.assertEqual(
4860 msg.get_filename(),
4861 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4862
4863 def test_rfc2231_no_language_or_charset_in_boundary(self):
4864 m = '''\
4865Content-Type: multipart/alternative;
4866\tboundary*0*="''This%20is%20even%20more%20";
4867\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4868\tboundary*2="is it not.pdf"
4869
4870'''
4871 msg = email.message_from_string(m)
4872 self.assertEqual(msg.get_boundary(),
4873 'This is even more ***fun*** is it not.pdf')
4874
4875 def test_rfc2231_no_language_or_charset_in_charset(self):
4876 # This is a nonsensical charset value, but tests the code anyway
4877 m = '''\
4878Content-Type: text/plain;
4879\tcharset*0*="This%20is%20even%20more%20";
4880\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4881\tcharset*2="is it not.pdf"
4882
4883'''
4884 msg = email.message_from_string(m)
4885 self.assertEqual(msg.get_content_charset(),
4886 'this is even more ***fun*** is it not.pdf')
4887
R David Murray97f43c02012-06-24 05:03:27 -04004888 # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004889 def test_rfc2231_bad_encoding_in_filename(self):
4890 m = '''\
4891Content-Disposition: inline;
4892\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4893\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4894\tfilename*2="is it not.pdf"
4895
4896'''
4897 msg = email.message_from_string(m)
4898 self.assertEqual(msg.get_filename(),
4899 'This is even more ***fun*** is it not.pdf')
4900
4901 def test_rfc2231_bad_encoding_in_charset(self):
4902 m = """\
4903Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4904
4905"""
4906 msg = email.message_from_string(m)
4907 # This should return None because non-ascii characters in the charset
4908 # are not allowed.
4909 self.assertEqual(msg.get_content_charset(), None)
4910
4911 def test_rfc2231_bad_character_in_charset(self):
4912 m = """\
4913Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4914
4915"""
4916 msg = email.message_from_string(m)
4917 # This should return None because non-ascii characters in the charset
4918 # are not allowed.
4919 self.assertEqual(msg.get_content_charset(), None)
4920
4921 def test_rfc2231_bad_character_in_filename(self):
4922 m = '''\
4923Content-Disposition: inline;
4924\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4925\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4926\tfilename*2*="is it not.pdf%E2"
4927
4928'''
4929 msg = email.message_from_string(m)
4930 self.assertEqual(msg.get_filename(),
4931 'This is even more ***fun*** is it not.pdf\ufffd')
4932
4933 def test_rfc2231_unknown_encoding(self):
4934 m = """\
4935Content-Transfer-Encoding: 8bit
4936Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4937
4938"""
4939 msg = email.message_from_string(m)
4940 self.assertEqual(msg.get_filename(), 'myfile.txt')
4941
4942 def test_rfc2231_single_tick_in_filename_extended(self):
4943 eq = self.assertEqual
4944 m = """\
4945Content-Type: application/x-foo;
4946\tname*0*=\"Frank's\"; name*1*=\" Document\"
4947
4948"""
4949 msg = email.message_from_string(m)
4950 charset, language, s = msg.get_param('name')
4951 eq(charset, None)
4952 eq(language, None)
4953 eq(s, "Frank's Document")
4954
R David Murray97f43c02012-06-24 05:03:27 -04004955 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004956 def test_rfc2231_single_tick_in_filename(self):
4957 m = """\
4958Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4959
4960"""
4961 msg = email.message_from_string(m)
4962 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004963 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004964 self.assertEqual(param, "Frank's Document")
4965
R David Murray97f43c02012-06-24 05:03:27 -04004966 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004967 def test_rfc2231_tick_attack_extended(self):
4968 eq = self.assertEqual
4969 m = """\
4970Content-Type: application/x-foo;
4971\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4972
4973"""
4974 msg = email.message_from_string(m)
4975 charset, language, s = msg.get_param('name')
4976 eq(charset, 'us-ascii')
4977 eq(language, 'en-us')
4978 eq(s, "Frank's Document")
4979
R David Murray97f43c02012-06-24 05:03:27 -04004980 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004981 def test_rfc2231_tick_attack(self):
4982 m = """\
4983Content-Type: application/x-foo;
4984\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4985
4986"""
4987 msg = email.message_from_string(m)
4988 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004989 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004990 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4991
R David Murray97f43c02012-06-24 05:03:27 -04004992 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004993 def test_rfc2231_no_extended_values(self):
4994 eq = self.assertEqual
4995 m = """\
4996Content-Type: application/x-foo; name=\"Frank's Document\"
4997
4998"""
4999 msg = email.message_from_string(m)
5000 eq(msg.get_param('name'), "Frank's Document")
5001
R David Murray97f43c02012-06-24 05:03:27 -04005002 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005003 def test_rfc2231_encoded_then_unencoded_segments(self):
5004 eq = self.assertEqual
5005 m = """\
5006Content-Type: application/x-foo;
5007\tname*0*=\"us-ascii'en-us'My\";
5008\tname*1=\" Document\";
5009\tname*2*=\" For You\"
5010
5011"""
5012 msg = email.message_from_string(m)
5013 charset, language, s = msg.get_param('name')
5014 eq(charset, 'us-ascii')
5015 eq(language, 'en-us')
5016 eq(s, 'My Document For You')
5017
R David Murray97f43c02012-06-24 05:03:27 -04005018 # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments
5019 # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005020 def test_rfc2231_unencoded_then_encoded_segments(self):
5021 eq = self.assertEqual
5022 m = """\
5023Content-Type: application/x-foo;
5024\tname*0=\"us-ascii'en-us'My\";
5025\tname*1*=\" Document\";
5026\tname*2*=\" For You\"
5027
5028"""
5029 msg = email.message_from_string(m)
5030 charset, language, s = msg.get_param('name')
5031 eq(charset, 'us-ascii')
5032 eq(language, 'en-us')
5033 eq(s, 'My Document For You')
5034
5035
Ezio Melottib3aedd42010-11-20 19:04:17 +00005036
R. David Murraya8f480f2010-01-16 18:30:03 +00005037# Tests to ensure that signed parts of an email are completely preserved, as
5038# required by RFC1847 section 2.1. Note that these are incomplete, because the
5039# email package does not currently always preserve the body. See issue 1670765.
5040class TestSigned(TestEmailBase):
5041
5042 def _msg_and_obj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -04005043 with openfile(filename) as fp:
R. David Murraya8f480f2010-01-16 18:30:03 +00005044 original = fp.read()
5045 msg = email.message_from_string(original)
5046 return original, msg
5047
5048 def _signed_parts_eq(self, original, result):
5049 # Extract the first mime part of each message
5050 import re
5051 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
5052 inpart = repart.search(original).group(2)
5053 outpart = repart.search(result).group(2)
5054 self.assertEqual(outpart, inpart)
5055
5056 def test_long_headers_as_string(self):
5057 original, msg = self._msg_and_obj('msg_45.txt')
5058 result = msg.as_string()
5059 self._signed_parts_eq(original, result)
5060
5061 def test_long_headers_as_string_maxheaderlen(self):
5062 original, msg = self._msg_and_obj('msg_45.txt')
5063 result = msg.as_string(maxheaderlen=60)
5064 self._signed_parts_eq(original, result)
5065
5066 def test_long_headers_flatten(self):
5067 original, msg = self._msg_and_obj('msg_45.txt')
5068 fp = StringIO()
5069 Generator(fp).flatten(msg)
5070 result = fp.getvalue()
5071 self._signed_parts_eq(original, result)
5072
5073
Ezio Melottib3aedd42010-11-20 19:04:17 +00005074
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005075if __name__ == '__main__':
R David Murray9aaba782011-03-21 17:17:06 -04005076 unittest.main()