blob: 003df96f0ab1d2bb62428c22709c6ecf189c36c6 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
R David Murrayc27e5222012-05-25 15:01:48 -040019import email.policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +000020
21from email.charset import Charset
22from email.header import Header, decode_header, make_header
23from email.parser import Parser, HeaderParser
24from email.generator import Generator, DecodedGenerator
25from email.message import Message
26from email.mime.application import MIMEApplication
27from email.mime.audio import MIMEAudio
28from email.mime.text import MIMEText
29from email.mime.image import MIMEImage
30from email.mime.base import MIMEBase
31from email.mime.message import MIMEMessage
32from email.mime.multipart import MIMEMultipart
33from email import utils
34from email import errors
35from email import encoders
36from email import iterators
37from email import base64mime
38from email import quoprimime
39
R David Murray28346b82011-03-31 11:40:20 -040040from test.support import run_unittest, unlink
R David Murraya256bac2011-03-31 12:20:23 -040041from test.test_email import openfile, TestEmailBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +000042
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Guido van Rossum8b3febe2007-08-30 01:15:14 +000048# Test various aspects of the Message class's API
49class TestMessageAPI(TestEmailBase):
50 def test_get_all(self):
51 eq = self.assertEqual
52 msg = self._msgobj('msg_20.txt')
53 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
54 eq(msg.get_all('xx', 'n/a'), 'n/a')
55
R. David Murraye5db2632010-11-20 15:10:13 +000056 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000057 eq = self.assertEqual
58 msg = Message()
59 eq(msg.get_charset(), None)
60 charset = Charset('iso-8859-1')
61 msg.set_charset(charset)
62 eq(msg['mime-version'], '1.0')
63 eq(msg.get_content_type(), 'text/plain')
64 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
65 eq(msg.get_param('charset'), 'iso-8859-1')
66 eq(msg['content-transfer-encoding'], 'quoted-printable')
67 eq(msg.get_charset().input_charset, 'iso-8859-1')
68 # Remove the charset
69 msg.set_charset(None)
70 eq(msg.get_charset(), None)
71 eq(msg['content-type'], 'text/plain')
72 # Try adding a charset when there's already MIME headers present
73 msg = Message()
74 msg['MIME-Version'] = '2.0'
75 msg['Content-Type'] = 'text/x-weird'
76 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
77 msg.set_charset(charset)
78 eq(msg['mime-version'], '2.0')
79 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
80 eq(msg['content-transfer-encoding'], 'quinted-puntable')
81
82 def test_set_charset_from_string(self):
83 eq = self.assertEqual
84 msg = Message()
85 msg.set_charset('us-ascii')
86 eq(msg.get_charset().input_charset, 'us-ascii')
87 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
88
89 def test_set_payload_with_charset(self):
90 msg = Message()
91 charset = Charset('iso-8859-1')
92 msg.set_payload('This is a string payload', charset)
93 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
94
95 def test_get_charsets(self):
96 eq = self.assertEqual
97
98 msg = self._msgobj('msg_08.txt')
99 charsets = msg.get_charsets()
100 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
101
102 msg = self._msgobj('msg_09.txt')
103 charsets = msg.get_charsets('dingbat')
104 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
105 'koi8-r'])
106
107 msg = self._msgobj('msg_12.txt')
108 charsets = msg.get_charsets()
109 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
110 'iso-8859-3', 'us-ascii', 'koi8-r'])
111
112 def test_get_filename(self):
113 eq = self.assertEqual
114
115 msg = self._msgobj('msg_04.txt')
116 filenames = [p.get_filename() for p in msg.get_payload()]
117 eq(filenames, ['msg.txt', 'msg.txt'])
118
119 msg = self._msgobj('msg_07.txt')
120 subpart = msg.get_payload(1)
121 eq(subpart.get_filename(), 'dingusfish.gif')
122
123 def test_get_filename_with_name_parameter(self):
124 eq = self.assertEqual
125
126 msg = self._msgobj('msg_44.txt')
127 filenames = [p.get_filename() for p in msg.get_payload()]
128 eq(filenames, ['msg.txt', 'msg.txt'])
129
130 def test_get_boundary(self):
131 eq = self.assertEqual
132 msg = self._msgobj('msg_07.txt')
133 # No quotes!
134 eq(msg.get_boundary(), 'BOUNDARY')
135
136 def test_set_boundary(self):
137 eq = self.assertEqual
138 # This one has no existing boundary parameter, but the Content-Type:
139 # header appears fifth.
140 msg = self._msgobj('msg_01.txt')
141 msg.set_boundary('BOUNDARY')
142 header, value = msg.items()[4]
143 eq(header.lower(), 'content-type')
144 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
145 # This one has a Content-Type: header, with a boundary, stuck in the
146 # middle of its headers. Make sure the order is preserved; it should
147 # be fifth.
148 msg = self._msgobj('msg_04.txt')
149 msg.set_boundary('BOUNDARY')
150 header, value = msg.items()[4]
151 eq(header.lower(), 'content-type')
152 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
153 # And this one has no Content-Type: header at all.
154 msg = self._msgobj('msg_03.txt')
155 self.assertRaises(errors.HeaderParseError,
156 msg.set_boundary, 'BOUNDARY')
157
R. David Murray73a559d2010-12-21 18:07:59 +0000158 def test_make_boundary(self):
159 msg = MIMEMultipart('form-data')
160 # Note that when the boundary gets created is an implementation
161 # detail and might change.
162 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
163 # Trigger creation of boundary
164 msg.as_string()
165 self.assertEqual(msg.items()[0][1][:33],
166 'multipart/form-data; boundary="==')
167 # XXX: there ought to be tests of the uniqueness of the boundary, too.
168
R. David Murray57c45ac2010-02-21 04:39:40 +0000169 def test_message_rfc822_only(self):
170 # Issue 7970: message/rfc822 not in multipart parsed by
171 # HeaderParser caused an exception when flattened.
R David Murray28346b82011-03-31 11:40:20 -0400172 with openfile('msg_46.txt') as fp:
Brett Cannon384917a2010-10-29 23:08:36 +0000173 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000174 parser = HeaderParser()
175 msg = parser.parsestr(msgdata)
176 out = StringIO()
177 gen = Generator(out, True, 0)
178 gen.flatten(msg, False)
179 self.assertEqual(out.getvalue(), msgdata)
180
R David Murrayb35c8502011-04-13 16:46:05 -0400181 def test_byte_message_rfc822_only(self):
182 # Make sure new bytes header parser also passes this.
183 with openfile('msg_46.txt', 'rb') as fp:
184 msgdata = fp.read()
185 parser = email.parser.BytesHeaderParser()
186 msg = parser.parsebytes(msgdata)
187 out = BytesIO()
188 gen = email.generator.BytesGenerator(out)
189 gen.flatten(msg)
190 self.assertEqual(out.getvalue(), msgdata)
191
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000192 def test_get_decoded_payload(self):
193 eq = self.assertEqual
194 msg = self._msgobj('msg_10.txt')
195 # The outer message is a multipart
196 eq(msg.get_payload(decode=True), None)
197 # Subpart 1 is 7bit encoded
198 eq(msg.get_payload(0).get_payload(decode=True),
199 b'This is a 7bit encoded message.\n')
200 # Subpart 2 is quopri
201 eq(msg.get_payload(1).get_payload(decode=True),
202 b'\xa1This is a Quoted Printable encoded message!\n')
203 # Subpart 3 is base64
204 eq(msg.get_payload(2).get_payload(decode=True),
205 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000206 # Subpart 4 is base64 with a trailing newline, which
207 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000208 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000209 b'This is a Base64 encoded message.\n')
210 # Subpart 5 has no Content-Transfer-Encoding: header.
211 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000212 b'This has no Content-Transfer-Encoding: header.\n')
213
214 def test_get_decoded_uu_payload(self):
215 eq = self.assertEqual
216 msg = Message()
217 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
218 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
219 msg['content-transfer-encoding'] = cte
220 eq(msg.get_payload(decode=True), b'hello world')
221 # Now try some bogus data
222 msg.set_payload('foo')
223 eq(msg.get_payload(decode=True), b'foo')
224
R David Murraya2860e82011-04-16 09:20:30 -0400225 def test_get_payload_n_raises_on_non_multipart(self):
226 msg = Message()
227 self.assertRaises(TypeError, msg.get_payload, 1)
228
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000229 def test_decoded_generator(self):
230 eq = self.assertEqual
231 msg = self._msgobj('msg_07.txt')
232 with openfile('msg_17.txt') as fp:
233 text = fp.read()
234 s = StringIO()
235 g = DecodedGenerator(s)
236 g.flatten(msg)
237 eq(s.getvalue(), text)
238
239 def test__contains__(self):
240 msg = Message()
241 msg['From'] = 'Me'
242 msg['to'] = 'You'
243 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000244 self.assertTrue('from' in msg)
245 self.assertTrue('From' in msg)
246 self.assertTrue('FROM' in msg)
247 self.assertTrue('to' in msg)
248 self.assertTrue('To' in msg)
249 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000250
251 def test_as_string(self):
252 eq = self.ndiffAssertEqual
253 msg = self._msgobj('msg_01.txt')
254 with openfile('msg_01.txt') as fp:
255 text = fp.read()
256 eq(text, str(msg))
257 fullrepr = msg.as_string(unixfrom=True)
258 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000259 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000260 eq(text, NL.join(lines[1:]))
261
R David Murray97f43c02012-06-24 05:03:27 -0400262 # test_headerregistry.TestContentTypeHeader.bad_params
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000263 def test_bad_param(self):
264 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
265 self.assertEqual(msg.get_param('baz'), '')
266
267 def test_missing_filename(self):
268 msg = email.message_from_string("From: foo\n")
269 self.assertEqual(msg.get_filename(), None)
270
271 def test_bogus_filename(self):
272 msg = email.message_from_string(
273 "Content-Disposition: blarg; filename\n")
274 self.assertEqual(msg.get_filename(), '')
275
276 def test_missing_boundary(self):
277 msg = email.message_from_string("From: foo\n")
278 self.assertEqual(msg.get_boundary(), None)
279
280 def test_get_params(self):
281 eq = self.assertEqual
282 msg = email.message_from_string(
283 'X-Header: foo=one; bar=two; baz=three\n')
284 eq(msg.get_params(header='x-header'),
285 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
286 msg = email.message_from_string(
287 'X-Header: foo; bar=one; baz=two\n')
288 eq(msg.get_params(header='x-header'),
289 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
290 eq(msg.get_params(), None)
291 msg = email.message_from_string(
292 'X-Header: foo; bar="one"; baz=two\n')
293 eq(msg.get_params(header='x-header'),
294 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
295
R David Murray97f43c02012-06-24 05:03:27 -0400296 # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000297 def test_get_param_liberal(self):
298 msg = Message()
299 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
300 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
301
302 def test_get_param(self):
303 eq = self.assertEqual
304 msg = email.message_from_string(
305 "X-Header: foo=one; bar=two; baz=three\n")
306 eq(msg.get_param('bar', header='x-header'), 'two')
307 eq(msg.get_param('quuz', header='x-header'), None)
308 eq(msg.get_param('quuz'), None)
309 msg = email.message_from_string(
310 'X-Header: foo; bar="one"; baz=two\n')
311 eq(msg.get_param('foo', header='x-header'), '')
312 eq(msg.get_param('bar', header='x-header'), 'one')
313 eq(msg.get_param('baz', header='x-header'), 'two')
314 # XXX: We are not RFC-2045 compliant! We cannot parse:
315 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
316 # msg.get_param("weird")
317 # yet.
318
R David Murray97f43c02012-06-24 05:03:27 -0400319 # test_headerregistry.TestContentTypeHeader.spaces_around_semis
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000320 def test_get_param_funky_continuation_lines(self):
321 msg = self._msgobj('msg_22.txt')
322 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
323
R David Murray97f43c02012-06-24 05:03:27 -0400324 # test_headerregistry.TestContentTypeHeader.semis_inside_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000325 def test_get_param_with_semis_in_quotes(self):
326 msg = email.message_from_string(
327 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
328 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
329 self.assertEqual(msg.get_param('name', unquote=False),
330 '"Jim&amp;&amp;Jill"')
331
R David Murray97f43c02012-06-24 05:03:27 -0400332 # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value
R. David Murrayd48739f2010-04-14 18:59:18 +0000333 def test_get_param_with_quotes(self):
334 msg = email.message_from_string(
335 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
336 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
337 msg = email.message_from_string(
338 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
339 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
340
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000341 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000342 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000343 msg = email.message_from_string('Header: exists')
344 unless('header' in msg)
345 unless('Header' in msg)
346 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000347 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000348
349 def test_set_param(self):
350 eq = self.assertEqual
351 msg = Message()
352 msg.set_param('charset', 'iso-2022-jp')
353 eq(msg.get_param('charset'), 'iso-2022-jp')
354 msg.set_param('importance', 'high value')
355 eq(msg.get_param('importance'), 'high value')
356 eq(msg.get_param('importance', unquote=False), '"high value"')
357 eq(msg.get_params(), [('text/plain', ''),
358 ('charset', 'iso-2022-jp'),
359 ('importance', 'high value')])
360 eq(msg.get_params(unquote=False), [('text/plain', ''),
361 ('charset', '"iso-2022-jp"'),
362 ('importance', '"high value"')])
363 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
364 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
365
366 def test_del_param(self):
367 eq = self.assertEqual
368 msg = self._msgobj('msg_05.txt')
369 eq(msg.get_params(),
370 [('multipart/report', ''), ('report-type', 'delivery-status'),
371 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
372 old_val = msg.get_param("report-type")
373 msg.del_param("report-type")
374 eq(msg.get_params(),
375 [('multipart/report', ''),
376 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
377 msg.set_param("report-type", old_val)
378 eq(msg.get_params(),
379 [('multipart/report', ''),
380 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
381 ('report-type', old_val)])
382
383 def test_del_param_on_other_header(self):
384 msg = Message()
385 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
386 msg.del_param('filename', 'content-disposition')
387 self.assertEqual(msg['content-disposition'], 'attachment')
388
R David Murraya2860e82011-04-16 09:20:30 -0400389 def test_del_param_on_nonexistent_header(self):
390 msg = Message()
391 msg.del_param('filename', 'content-disposition')
392
393 def test_del_nonexistent_param(self):
394 msg = Message()
395 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
396 existing_header = msg['Content-Type']
397 msg.del_param('foobar', header='Content-Type')
398 self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"')
399
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000400 def test_set_type(self):
401 eq = self.assertEqual
402 msg = Message()
403 self.assertRaises(ValueError, msg.set_type, 'text')
404 msg.set_type('text/plain')
405 eq(msg['content-type'], 'text/plain')
406 msg.set_param('charset', 'us-ascii')
407 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
408 msg.set_type('text/html')
409 eq(msg['content-type'], 'text/html; charset="us-ascii"')
410
411 def test_set_type_on_other_header(self):
412 msg = Message()
413 msg['X-Content-Type'] = 'text/plain'
414 msg.set_type('application/octet-stream', 'X-Content-Type')
415 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
416
417 def test_get_content_type_missing(self):
418 msg = Message()
419 self.assertEqual(msg.get_content_type(), 'text/plain')
420
421 def test_get_content_type_missing_with_default_type(self):
422 msg = Message()
423 msg.set_default_type('message/rfc822')
424 self.assertEqual(msg.get_content_type(), 'message/rfc822')
425
426 def test_get_content_type_from_message_implicit(self):
427 msg = self._msgobj('msg_30.txt')
428 self.assertEqual(msg.get_payload(0).get_content_type(),
429 'message/rfc822')
430
431 def test_get_content_type_from_message_explicit(self):
432 msg = self._msgobj('msg_28.txt')
433 self.assertEqual(msg.get_payload(0).get_content_type(),
434 'message/rfc822')
435
436 def test_get_content_type_from_message_text_plain_implicit(self):
437 msg = self._msgobj('msg_03.txt')
438 self.assertEqual(msg.get_content_type(), 'text/plain')
439
440 def test_get_content_type_from_message_text_plain_explicit(self):
441 msg = self._msgobj('msg_01.txt')
442 self.assertEqual(msg.get_content_type(), 'text/plain')
443
444 def test_get_content_maintype_missing(self):
445 msg = Message()
446 self.assertEqual(msg.get_content_maintype(), 'text')
447
448 def test_get_content_maintype_missing_with_default_type(self):
449 msg = Message()
450 msg.set_default_type('message/rfc822')
451 self.assertEqual(msg.get_content_maintype(), 'message')
452
453 def test_get_content_maintype_from_message_implicit(self):
454 msg = self._msgobj('msg_30.txt')
455 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
456
457 def test_get_content_maintype_from_message_explicit(self):
458 msg = self._msgobj('msg_28.txt')
459 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
460
461 def test_get_content_maintype_from_message_text_plain_implicit(self):
462 msg = self._msgobj('msg_03.txt')
463 self.assertEqual(msg.get_content_maintype(), 'text')
464
465 def test_get_content_maintype_from_message_text_plain_explicit(self):
466 msg = self._msgobj('msg_01.txt')
467 self.assertEqual(msg.get_content_maintype(), 'text')
468
469 def test_get_content_subtype_missing(self):
470 msg = Message()
471 self.assertEqual(msg.get_content_subtype(), 'plain')
472
473 def test_get_content_subtype_missing_with_default_type(self):
474 msg = Message()
475 msg.set_default_type('message/rfc822')
476 self.assertEqual(msg.get_content_subtype(), 'rfc822')
477
478 def test_get_content_subtype_from_message_implicit(self):
479 msg = self._msgobj('msg_30.txt')
480 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
481
482 def test_get_content_subtype_from_message_explicit(self):
483 msg = self._msgobj('msg_28.txt')
484 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
485
486 def test_get_content_subtype_from_message_text_plain_implicit(self):
487 msg = self._msgobj('msg_03.txt')
488 self.assertEqual(msg.get_content_subtype(), 'plain')
489
490 def test_get_content_subtype_from_message_text_plain_explicit(self):
491 msg = self._msgobj('msg_01.txt')
492 self.assertEqual(msg.get_content_subtype(), 'plain')
493
494 def test_get_content_maintype_error(self):
495 msg = Message()
496 msg['Content-Type'] = 'no-slash-in-this-string'
497 self.assertEqual(msg.get_content_maintype(), 'text')
498
499 def test_get_content_subtype_error(self):
500 msg = Message()
501 msg['Content-Type'] = 'no-slash-in-this-string'
502 self.assertEqual(msg.get_content_subtype(), 'plain')
503
504 def test_replace_header(self):
505 eq = self.assertEqual
506 msg = Message()
507 msg.add_header('First', 'One')
508 msg.add_header('Second', 'Two')
509 msg.add_header('Third', 'Three')
510 eq(msg.keys(), ['First', 'Second', 'Third'])
511 eq(msg.values(), ['One', 'Two', 'Three'])
512 msg.replace_header('Second', 'Twenty')
513 eq(msg.keys(), ['First', 'Second', 'Third'])
514 eq(msg.values(), ['One', 'Twenty', 'Three'])
515 msg.add_header('First', 'Eleven')
516 msg.replace_header('First', 'One Hundred')
517 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
518 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
519 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
520
R David Murray80e0aee2012-05-27 21:23:34 -0400521 # test_defect_handling:test_invalid_chars_in_base64_payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000522 def test_broken_base64_payload(self):
523 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
524 msg = Message()
525 msg['content-type'] = 'audio/x-midi'
526 msg['content-transfer-encoding'] = 'base64'
527 msg.set_payload(x)
528 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -0400529 (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0'
530 b'\xa1\x00p\xf6\xbf\xe9\x0f'))
531 self.assertIsInstance(msg.defects[0],
532 errors.InvalidBase64CharactersDefect)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000533
R David Murraya2860e82011-04-16 09:20:30 -0400534 def test_broken_unicode_payload(self):
535 # This test improves coverage but is not a compliance test.
536 # The behavior in this situation is currently undefined by the API.
537 x = 'this is a br\xf6ken thing to do'
538 msg = Message()
539 msg['content-type'] = 'text/plain'
540 msg['content-transfer-encoding'] = '8bit'
541 msg.set_payload(x)
542 self.assertEqual(msg.get_payload(decode=True),
543 bytes(x, 'raw-unicode-escape'))
544
545 def test_questionable_bytes_payload(self):
546 # This test improves coverage but is not a compliance test,
547 # since it involves poking inside the black box.
548 x = 'this is a quéstionable thing to do'.encode('utf-8')
549 msg = Message()
550 msg['content-type'] = 'text/plain; charset="utf-8"'
551 msg['content-transfer-encoding'] = '8bit'
552 msg._payload = x
553 self.assertEqual(msg.get_payload(decode=True), x)
554
R. David Murray7ec754b2010-12-13 23:51:19 +0000555 # Issue 1078919
556 def test_ascii_add_header(self):
557 msg = Message()
558 msg.add_header('Content-Disposition', 'attachment',
559 filename='bud.gif')
560 self.assertEqual('attachment; filename="bud.gif"',
561 msg['Content-Disposition'])
562
563 def test_noascii_add_header(self):
564 msg = Message()
565 msg.add_header('Content-Disposition', 'attachment',
566 filename="Fußballer.ppt")
567 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000568 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000569 msg['Content-Disposition'])
570
571 def test_nonascii_add_header_via_triple(self):
572 msg = Message()
573 msg.add_header('Content-Disposition', 'attachment',
574 filename=('iso-8859-1', '', 'Fußballer.ppt'))
575 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000576 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
577 msg['Content-Disposition'])
578
579 def test_ascii_add_header_with_tspecial(self):
580 msg = Message()
581 msg.add_header('Content-Disposition', 'attachment',
582 filename="windows [filename].ppt")
583 self.assertEqual(
584 'attachment; filename="windows [filename].ppt"',
585 msg['Content-Disposition'])
586
587 def test_nonascii_add_header_with_tspecial(self):
588 msg = Message()
589 msg.add_header('Content-Disposition', 'attachment',
590 filename="Fußballer [filename].ppt")
591 self.assertEqual(
592 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000593 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000594
R David Murraya2860e82011-04-16 09:20:30 -0400595 def test_add_header_with_name_only_param(self):
596 msg = Message()
597 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
598 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
599
600 def test_add_header_with_no_value(self):
601 msg = Message()
602 msg.add_header('X-Status', None)
603 self.assertEqual('', msg['X-Status'])
604
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000605 # Issue 5871: reject an attempt to embed a header inside a header value
606 # (header injection attack).
607 def test_embeded_header_via_Header_rejected(self):
608 msg = Message()
609 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
610 self.assertRaises(errors.HeaderParseError, msg.as_string)
611
612 def test_embeded_header_via_string_rejected(self):
613 msg = Message()
614 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
615 self.assertRaises(errors.HeaderParseError, msg.as_string)
616
R David Murray7441a7a2012-03-14 02:59:51 -0400617 def test_unicode_header_defaults_to_utf8_encoding(self):
618 # Issue 14291
619 m = MIMEText('abc\n')
620 m['Subject'] = 'É test'
621 self.assertEqual(str(m),textwrap.dedent("""\
622 Content-Type: text/plain; charset="us-ascii"
623 MIME-Version: 1.0
624 Content-Transfer-Encoding: 7bit
625 Subject: =?utf-8?q?=C3=89_test?=
626
627 abc
628 """))
629
R David Murray8680bcc2012-03-22 22:17:51 -0400630 def test_unicode_body_defaults_to_utf8_encoding(self):
631 # Issue 14291
632 m = MIMEText('É testabc\n')
633 self.assertEqual(str(m),textwrap.dedent("""\
R David Murray8680bcc2012-03-22 22:17:51 -0400634 Content-Type: text/plain; charset="utf-8"
R David Murray42243c42012-03-22 22:40:44 -0400635 MIME-Version: 1.0
R David Murray8680bcc2012-03-22 22:17:51 -0400636 Content-Transfer-Encoding: base64
637
638 w4kgdGVzdGFiYwo=
639 """))
640
641
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000642# Test the email.encoders module
643class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400644
645 def test_EncodersEncode_base64(self):
646 with openfile('PyBanner048.gif', 'rb') as fp:
647 bindata = fp.read()
648 mimed = email.mime.image.MIMEImage(bindata)
649 base64ed = mimed.get_payload()
650 # the transfer-encoded body lines should all be <=76 characters
651 lines = base64ed.split('\n')
652 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
653
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000654 def test_encode_empty_payload(self):
655 eq = self.assertEqual
656 msg = Message()
657 msg.set_charset('us-ascii')
658 eq(msg['content-transfer-encoding'], '7bit')
659
660 def test_default_cte(self):
661 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000662 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000663 msg = MIMEText('hello world')
664 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000665 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000666 msg = MIMEText('hello \xf8 world')
R David Murray8680bcc2012-03-22 22:17:51 -0400667 eq(msg['content-transfer-encoding'], 'base64')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000668 # And now with a different charset
669 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
670 eq(msg['content-transfer-encoding'], 'quoted-printable')
671
R. David Murraye85200d2010-05-06 01:41:14 +0000672 def test_encode7or8bit(self):
673 # Make sure a charset whose input character set is 8bit but
674 # whose output character set is 7bit gets a transfer-encoding
675 # of 7bit.
676 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000677 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000678 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000679
Ezio Melottib3aedd42010-11-20 19:04:17 +0000680
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000681# Test long header wrapping
682class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400683
684 maxDiff = None
685
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000686 def test_split_long_continuation(self):
687 eq = self.ndiffAssertEqual
688 msg = email.message_from_string("""\
689Subject: bug demonstration
690\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
691\tmore text
692
693test
694""")
695 sfp = StringIO()
696 g = Generator(sfp)
697 g.flatten(msg)
698 eq(sfp.getvalue(), """\
699Subject: bug demonstration
700\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
701\tmore text
702
703test
704""")
705
706 def test_another_long_almost_unsplittable_header(self):
707 eq = self.ndiffAssertEqual
708 hstr = """\
709bug demonstration
710\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
711\tmore text"""
712 h = Header(hstr, continuation_ws='\t')
713 eq(h.encode(), """\
714bug demonstration
715\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
716\tmore text""")
717 h = Header(hstr.replace('\t', ' '))
718 eq(h.encode(), """\
719bug demonstration
720 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
721 more text""")
722
723 def test_long_nonstring(self):
724 eq = self.ndiffAssertEqual
725 g = Charset("iso-8859-1")
726 cz = Charset("iso-8859-2")
727 utf8 = Charset("utf-8")
728 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
729 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
730 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
731 b'bef\xf6rdert. ')
732 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
733 b'd\xf9vtipu.. ')
734 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
735 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
736 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
737 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
738 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
739 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
740 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
741 '\u3044\u307e\u3059\u3002')
742 h = Header(g_head, g, header_name='Subject')
743 h.append(cz_head, cz)
744 h.append(utf8_head, utf8)
745 msg = Message()
746 msg['Subject'] = h
747 sfp = StringIO()
748 g = Generator(sfp)
749 g.flatten(msg)
750 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000751Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
752 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
753 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
754 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
755 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
756 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
757 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
758 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
759 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
760 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
761 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000762
763""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000764 eq(h.encode(maxlinelen=76), """\
765=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
766 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
767 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
768 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
769 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
770 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
771 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
772 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
773 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
774 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
775 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000776
777 def test_long_header_encode(self):
778 eq = self.ndiffAssertEqual
779 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
780 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
781 header_name='X-Foobar-Spoink-Defrobnit')
782 eq(h.encode(), '''\
783wasnipoop; giraffes="very-long-necked-animals";
784 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
785
786 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
787 eq = self.ndiffAssertEqual
788 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
789 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
790 header_name='X-Foobar-Spoink-Defrobnit',
791 continuation_ws='\t')
792 eq(h.encode(), '''\
793wasnipoop; giraffes="very-long-necked-animals";
794 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
795
796 def test_long_header_encode_with_tab_continuation(self):
797 eq = self.ndiffAssertEqual
798 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
799 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
800 header_name='X-Foobar-Spoink-Defrobnit',
801 continuation_ws='\t')
802 eq(h.encode(), '''\
803wasnipoop; giraffes="very-long-necked-animals";
804\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
805
R David Murray3a6152f2011-03-14 21:13:03 -0400806 def test_header_encode_with_different_output_charset(self):
807 h = Header('文', 'euc-jp')
808 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
809
810 def test_long_header_encode_with_different_output_charset(self):
811 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
812 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
813 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
814 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
815 res = """\
816=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
817 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
818 self.assertEqual(h.encode(), res)
819
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000820 def test_header_splitter(self):
821 eq = self.ndiffAssertEqual
822 msg = MIMEText('')
823 # It'd be great if we could use add_header() here, but that doesn't
824 # guarantee an order of the parameters.
825 msg['X-Foobar-Spoink-Defrobnit'] = (
826 'wasnipoop; giraffes="very-long-necked-animals"; '
827 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
828 sfp = StringIO()
829 g = Generator(sfp)
830 g.flatten(msg)
831 eq(sfp.getvalue(), '''\
832Content-Type: text/plain; charset="us-ascii"
833MIME-Version: 1.0
834Content-Transfer-Encoding: 7bit
835X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
836 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
837
838''')
839
840 def test_no_semis_header_splitter(self):
841 eq = self.ndiffAssertEqual
842 msg = Message()
843 msg['From'] = 'test@dom.ain'
844 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
845 msg.set_payload('Test')
846 sfp = StringIO()
847 g = Generator(sfp)
848 g.flatten(msg)
849 eq(sfp.getvalue(), """\
850From: test@dom.ain
851References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
852 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
853
854Test""")
855
R David Murray7da4db12011-04-07 20:37:17 -0400856 def test_last_split_chunk_does_not_fit(self):
857 eq = self.ndiffAssertEqual
858 h = Header('Subject: the first part of this is short, but_the_second'
859 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
860 '_all_by_itself')
861 eq(h.encode(), """\
862Subject: the first part of this is short,
863 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
864
865 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
866 eq = self.ndiffAssertEqual
867 h = Header(', but_the_second'
868 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
869 '_all_by_itself')
870 eq(h.encode(), """\
871,
872 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
873
874 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
875 eq = self.ndiffAssertEqual
876 h = Header(', , but_the_second'
877 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
878 '_all_by_itself')
879 eq(h.encode(), """\
880, ,
881 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
882
883 def test_trailing_splitable_on_overlong_unsplitable(self):
884 eq = self.ndiffAssertEqual
885 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
886 'be_on_a_line_all_by_itself;')
887 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
888 "be_on_a_line_all_by_itself;")
889
890 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
891 eq = self.ndiffAssertEqual
892 h = Header('; '
893 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400894 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400895 eq(h.encode(), """\
896;
R David Murray01581ee2011-04-18 10:04:34 -0400897 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400898
R David Murraye1292a22011-04-07 20:54:03 -0400899 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400900 eq = self.ndiffAssertEqual
901 h = Header('This is a long line that has two whitespaces in a row. '
902 'This used to cause truncation of the header when folded')
903 eq(h.encode(), """\
904This is a long line that has two whitespaces in a row. This used to cause
905 truncation of the header when folded""")
906
R David Murray01581ee2011-04-18 10:04:34 -0400907 def test_splitter_split_on_punctuation_only_if_fws(self):
908 eq = self.ndiffAssertEqual
909 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
910 'they;arenotlegal;fold,points')
911 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
912 "arenotlegal;fold,points")
913
914 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
915 eq = self.ndiffAssertEqual
916 h = Header('this is a test where we need to have more than one line '
917 'before; our final line that is just too big to fit;; '
918 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
919 'be_on_a_line_all_by_itself;')
920 eq(h.encode(), """\
921this is a test where we need to have more than one line before;
922 our final line that is just too big to fit;;
923 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
924
925 def test_overlong_last_part_followed_by_split_point(self):
926 eq = self.ndiffAssertEqual
927 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
928 'be_on_a_line_all_by_itself ')
929 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
930 "should_be_on_a_line_all_by_itself ")
931
932 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
933 eq = self.ndiffAssertEqual
934 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
935 'before_our_final_line_; ; '
936 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
937 'be_on_a_line_all_by_itself; ')
938 eq(h.encode(), """\
939this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
940 ;
941 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
942
943 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
944 eq = self.ndiffAssertEqual
945 h = Header('this is a test where we need to have more than one line '
946 'before our final line; ; '
947 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
948 'be_on_a_line_all_by_itself; ')
949 eq(h.encode(), """\
950this is a test where we need to have more than one line before our final line;
951 ;
952 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
953
954 def test_long_header_with_whitespace_runs(self):
955 eq = self.ndiffAssertEqual
956 msg = Message()
957 msg['From'] = 'test@dom.ain'
958 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
959 msg.set_payload('Test')
960 sfp = StringIO()
961 g = Generator(sfp)
962 g.flatten(msg)
963 eq(sfp.getvalue(), """\
964From: test@dom.ain
965References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
966 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
967 <foo@dom.ain> <foo@dom.ain>\x20\x20
968
969Test""")
970
971 def test_long_run_with_semi_header_splitter(self):
972 eq = self.ndiffAssertEqual
973 msg = Message()
974 msg['From'] = 'test@dom.ain'
975 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
976 msg.set_payload('Test')
977 sfp = StringIO()
978 g = Generator(sfp)
979 g.flatten(msg)
980 eq(sfp.getvalue(), """\
981From: test@dom.ain
982References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
983 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
984 <foo@dom.ain>; abc
985
986Test""")
987
988 def test_splitter_split_on_punctuation_only_if_fws(self):
989 eq = self.ndiffAssertEqual
990 msg = Message()
991 msg['From'] = 'test@dom.ain'
992 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
993 'they;arenotlegal;fold,points')
994 msg.set_payload('Test')
995 sfp = StringIO()
996 g = Generator(sfp)
997 g.flatten(msg)
998 # XXX the space after the header should not be there.
999 eq(sfp.getvalue(), """\
1000From: test@dom.ain
1001References:\x20
1002 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
1003
1004Test""")
1005
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001006 def test_no_split_long_header(self):
1007 eq = self.ndiffAssertEqual
1008 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +00001009 h = Header(hstr)
1010 # These come on two lines because Headers are really field value
1011 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001012 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001013References:
1014 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1015 h = Header('x' * 80)
1016 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001017
1018 def test_splitting_multiple_long_lines(self):
1019 eq = self.ndiffAssertEqual
1020 hstr = """\
1021from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1022\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1023\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1024"""
1025 h = Header(hstr, continuation_ws='\t')
1026 eq(h.encode(), """\
1027from babylon.socal-raves.org (localhost [127.0.0.1]);
1028 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1029 for <mailman-admin@babylon.socal-raves.org>;
1030 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1031\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1032 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1033 for <mailman-admin@babylon.socal-raves.org>;
1034 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1035\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1036 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1037 for <mailman-admin@babylon.socal-raves.org>;
1038 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1039
1040 def test_splitting_first_line_only_is_long(self):
1041 eq = self.ndiffAssertEqual
1042 hstr = """\
1043from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1044\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1045\tid 17k4h5-00034i-00
1046\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1047 h = Header(hstr, maxlinelen=78, header_name='Received',
1048 continuation_ws='\t')
1049 eq(h.encode(), """\
1050from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1051 helo=cthulhu.gerg.ca)
1052\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1053\tid 17k4h5-00034i-00
1054\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1055
1056 def test_long_8bit_header(self):
1057 eq = self.ndiffAssertEqual
1058 msg = Message()
1059 h = Header('Britische Regierung gibt', 'iso-8859-1',
1060 header_name='Subject')
1061 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001062 eq(h.encode(maxlinelen=76), """\
1063=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1064 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001065 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001066 eq(msg.as_string(maxheaderlen=76), """\
1067Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1068 =?iso-8859-1?q?hore-Windkraftprojekte?=
1069
1070""")
1071 eq(msg.as_string(maxheaderlen=0), """\
1072Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001073
1074""")
1075
1076 def test_long_8bit_header_no_charset(self):
1077 eq = self.ndiffAssertEqual
1078 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001079 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1080 'f\xfcr Offshore-Windkraftprojekte '
1081 '<a-very-long-address@example.com>')
1082 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001083 eq(msg.as_string(maxheaderlen=78), """\
1084Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1085 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1086
1087""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001088 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001089 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001090 header_name='Reply-To')
1091 eq(msg.as_string(maxheaderlen=78), """\
1092Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1093 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001094
1095""")
1096
1097 def test_long_to_header(self):
1098 eq = self.ndiffAssertEqual
1099 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001100 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001101 '"Someone Test #B" <someone@umich.edu>, '
1102 '"Someone Test #C" <someone@eecs.umich.edu>, '
1103 '"Someone Test #D" <someone@eecs.umich.edu>')
1104 msg = Message()
1105 msg['To'] = to
1106 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001107To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001108 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001109 "Someone Test #C" <someone@eecs.umich.edu>,
1110 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001111
1112''')
1113
1114 def test_long_line_after_append(self):
1115 eq = self.ndiffAssertEqual
1116 s = 'This is an example of string which has almost the limit of header length.'
1117 h = Header(s)
1118 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001119 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001120This is an example of string which has almost the limit of header length.
1121 Add another line.""")
1122
1123 def test_shorter_line_with_append(self):
1124 eq = self.ndiffAssertEqual
1125 s = 'This is a shorter line.'
1126 h = Header(s)
1127 h.append('Add another sentence. (Surprise?)')
1128 eq(h.encode(),
1129 'This is a shorter line. Add another sentence. (Surprise?)')
1130
1131 def test_long_field_name(self):
1132 eq = self.ndiffAssertEqual
1133 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001134 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1135 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1136 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1137 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001138 h = Header(gs, 'iso-8859-1', header_name=fn)
1139 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001140 eq(h.encode(maxlinelen=76), """\
1141=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1142 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1143 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1144 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001145
1146 def test_long_received_header(self):
1147 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1148 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1149 'Wed, 05 Mar 2003 18:10:18 -0700')
1150 msg = Message()
1151 msg['Received-1'] = Header(h, continuation_ws='\t')
1152 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001153 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001154 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001155Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1156 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001157 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001158Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1159 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001160 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001161
1162""")
1163
1164 def test_string_headerinst_eq(self):
1165 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1166 'tu-muenchen.de> (David Bremner\'s message of '
1167 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1168 msg = Message()
1169 msg['Received-1'] = Header(h, header_name='Received-1',
1170 continuation_ws='\t')
1171 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001172 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001173 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001174Received-1:\x20
1175 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1176 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1177Received-2:\x20
1178 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1179 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001180
1181""")
1182
1183 def test_long_unbreakable_lines_with_continuation(self):
1184 eq = self.ndiffAssertEqual
1185 msg = Message()
1186 t = """\
1187iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1188 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1189 msg['Face-1'] = t
1190 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001191 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001192 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001193 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001194 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001195Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001196 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001197 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001198Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001199 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001200 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001201Face-3:\x20
1202 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1203 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001204
1205""")
1206
1207 def test_another_long_multiline_header(self):
1208 eq = self.ndiffAssertEqual
1209 m = ('Received: from siimage.com '
1210 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001211 'Microsoft SMTPSVC(5.0.2195.4905); '
1212 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001213 msg = email.message_from_string(m)
1214 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001215Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1216 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001217
1218''')
1219
1220 def test_long_lines_with_different_header(self):
1221 eq = self.ndiffAssertEqual
1222 h = ('List-Unsubscribe: '
1223 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1224 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1225 '?subject=unsubscribe>')
1226 msg = Message()
1227 msg['List'] = h
1228 msg['List'] = Header(h, header_name='List')
1229 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001230List: List-Unsubscribe:
1231 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001232 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001233List: List-Unsubscribe:
1234 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001235 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001236
1237""")
1238
R. David Murray6f0022d2011-01-07 21:57:25 +00001239 def test_long_rfc2047_header_with_embedded_fws(self):
1240 h = Header(textwrap.dedent("""\
1241 We're going to pretend this header is in a non-ascii character set
1242 \tto see if line wrapping with encoded words and embedded
1243 folding white space works"""),
1244 charset='utf-8',
1245 header_name='Test')
1246 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1247 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1248 =?utf-8?q?cter_set?=
1249 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1250 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1251
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001252
Ezio Melottib3aedd42010-11-20 19:04:17 +00001253
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001254# Test mangling of "From " lines in the body of a message
1255class TestFromMangling(unittest.TestCase):
1256 def setUp(self):
1257 self.msg = Message()
1258 self.msg['From'] = 'aaa@bbb.org'
1259 self.msg.set_payload("""\
1260From the desk of A.A.A.:
1261Blah blah blah
1262""")
1263
1264 def test_mangled_from(self):
1265 s = StringIO()
1266 g = Generator(s, mangle_from_=True)
1267 g.flatten(self.msg)
1268 self.assertEqual(s.getvalue(), """\
1269From: aaa@bbb.org
1270
1271>From the desk of A.A.A.:
1272Blah blah blah
1273""")
1274
1275 def test_dont_mangle_from(self):
1276 s = StringIO()
1277 g = Generator(s, mangle_from_=False)
1278 g.flatten(self.msg)
1279 self.assertEqual(s.getvalue(), """\
1280From: aaa@bbb.org
1281
1282From the desk of A.A.A.:
1283Blah blah blah
1284""")
1285
1286
Ezio Melottib3aedd42010-11-20 19:04:17 +00001287
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001288# Test the basic MIMEAudio class
1289class TestMIMEAudio(unittest.TestCase):
1290 def setUp(self):
R David Murray28346b82011-03-31 11:40:20 -04001291 with openfile('audiotest.au', 'rb') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001292 self._audiodata = fp.read()
1293 self._au = MIMEAudio(self._audiodata)
1294
1295 def test_guess_minor_type(self):
1296 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1297
1298 def test_encoding(self):
1299 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001300 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1301 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001302
1303 def test_checkSetMinor(self):
1304 au = MIMEAudio(self._audiodata, 'fish')
1305 self.assertEqual(au.get_content_type(), 'audio/fish')
1306
1307 def test_add_header(self):
1308 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001309 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001310 self._au.add_header('Content-Disposition', 'attachment',
1311 filename='audiotest.au')
1312 eq(self._au['content-disposition'],
1313 'attachment; filename="audiotest.au"')
1314 eq(self._au.get_params(header='content-disposition'),
1315 [('attachment', ''), ('filename', 'audiotest.au')])
1316 eq(self._au.get_param('filename', header='content-disposition'),
1317 'audiotest.au')
1318 missing = []
1319 eq(self._au.get_param('attachment', header='content-disposition'), '')
1320 unless(self._au.get_param('foo', failobj=missing,
1321 header='content-disposition') is missing)
1322 # Try some missing stuff
1323 unless(self._au.get_param('foobar', missing) is missing)
1324 unless(self._au.get_param('attachment', missing,
1325 header='foobar') is missing)
1326
1327
Ezio Melottib3aedd42010-11-20 19:04:17 +00001328
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001329# Test the basic MIMEImage class
1330class TestMIMEImage(unittest.TestCase):
1331 def setUp(self):
1332 with openfile('PyBanner048.gif', 'rb') as fp:
1333 self._imgdata = fp.read()
1334 self._im = MIMEImage(self._imgdata)
1335
1336 def test_guess_minor_type(self):
1337 self.assertEqual(self._im.get_content_type(), 'image/gif')
1338
1339 def test_encoding(self):
1340 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001341 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1342 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001343
1344 def test_checkSetMinor(self):
1345 im = MIMEImage(self._imgdata, 'fish')
1346 self.assertEqual(im.get_content_type(), 'image/fish')
1347
1348 def test_add_header(self):
1349 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001350 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001351 self._im.add_header('Content-Disposition', 'attachment',
1352 filename='dingusfish.gif')
1353 eq(self._im['content-disposition'],
1354 'attachment; filename="dingusfish.gif"')
1355 eq(self._im.get_params(header='content-disposition'),
1356 [('attachment', ''), ('filename', 'dingusfish.gif')])
1357 eq(self._im.get_param('filename', header='content-disposition'),
1358 'dingusfish.gif')
1359 missing = []
1360 eq(self._im.get_param('attachment', header='content-disposition'), '')
1361 unless(self._im.get_param('foo', failobj=missing,
1362 header='content-disposition') is missing)
1363 # Try some missing stuff
1364 unless(self._im.get_param('foobar', missing) is missing)
1365 unless(self._im.get_param('attachment', missing,
1366 header='foobar') is missing)
1367
1368
Ezio Melottib3aedd42010-11-20 19:04:17 +00001369
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001370# Test the basic MIMEApplication class
1371class TestMIMEApplication(unittest.TestCase):
1372 def test_headers(self):
1373 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001374 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001375 eq(msg.get_content_type(), 'application/octet-stream')
1376 eq(msg['content-transfer-encoding'], 'base64')
1377
1378 def test_body(self):
1379 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001380 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1381 msg = MIMEApplication(bytesdata)
1382 # whitespace in the cte encoded block is RFC-irrelevant.
1383 eq(msg.get_payload().strip(), '+vv8/f7/')
1384 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001385
1386
Ezio Melottib3aedd42010-11-20 19:04:17 +00001387
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001388# Test the basic MIMEText class
1389class TestMIMEText(unittest.TestCase):
1390 def setUp(self):
1391 self._msg = MIMEText('hello there')
1392
1393 def test_types(self):
1394 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001395 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001396 eq(self._msg.get_content_type(), 'text/plain')
1397 eq(self._msg.get_param('charset'), 'us-ascii')
1398 missing = []
1399 unless(self._msg.get_param('foobar', missing) is missing)
1400 unless(self._msg.get_param('charset', missing, header='foobar')
1401 is missing)
1402
1403 def test_payload(self):
1404 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001405 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001406
1407 def test_charset(self):
1408 eq = self.assertEqual
1409 msg = MIMEText('hello there', _charset='us-ascii')
1410 eq(msg.get_charset().input_charset, 'us-ascii')
1411 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1412
R. David Murray850fc852010-06-03 01:58:28 +00001413 def test_7bit_input(self):
1414 eq = self.assertEqual
1415 msg = MIMEText('hello there', _charset='us-ascii')
1416 eq(msg.get_charset().input_charset, 'us-ascii')
1417 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1418
1419 def test_7bit_input_no_charset(self):
1420 eq = self.assertEqual
1421 msg = MIMEText('hello there')
1422 eq(msg.get_charset(), 'us-ascii')
1423 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1424 self.assertTrue('hello there' in msg.as_string())
1425
1426 def test_utf8_input(self):
1427 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1428 eq = self.assertEqual
1429 msg = MIMEText(teststr, _charset='utf-8')
1430 eq(msg.get_charset().output_charset, 'utf-8')
1431 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1432 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1433
1434 @unittest.skip("can't fix because of backward compat in email5, "
1435 "will fix in email6")
1436 def test_utf8_input_no_charset(self):
1437 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1438 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1439
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001440
Ezio Melottib3aedd42010-11-20 19:04:17 +00001441
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001442# Test complicated multipart/* messages
1443class TestMultipart(TestEmailBase):
1444 def setUp(self):
1445 with openfile('PyBanner048.gif', 'rb') as fp:
1446 data = fp.read()
1447 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1448 image = MIMEImage(data, name='dingusfish.gif')
1449 image.add_header('content-disposition', 'attachment',
1450 filename='dingusfish.gif')
1451 intro = MIMEText('''\
1452Hi there,
1453
1454This is the dingus fish.
1455''')
1456 container.attach(intro)
1457 container.attach(image)
1458 container['From'] = 'Barry <barry@digicool.com>'
1459 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1460 container['Subject'] = 'Here is your dingus fish'
1461
1462 now = 987809702.54848599
1463 timetuple = time.localtime(now)
1464 if timetuple[-1] == 0:
1465 tzsecs = time.timezone
1466 else:
1467 tzsecs = time.altzone
1468 if tzsecs > 0:
1469 sign = '-'
1470 else:
1471 sign = '+'
1472 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1473 container['Date'] = time.strftime(
1474 '%a, %d %b %Y %H:%M:%S',
1475 time.localtime(now)) + tzoffset
1476 self._msg = container
1477 self._im = image
1478 self._txt = intro
1479
1480 def test_hierarchy(self):
1481 # convenience
1482 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001483 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001484 raises = self.assertRaises
1485 # tests
1486 m = self._msg
1487 unless(m.is_multipart())
1488 eq(m.get_content_type(), 'multipart/mixed')
1489 eq(len(m.get_payload()), 2)
1490 raises(IndexError, m.get_payload, 2)
1491 m0 = m.get_payload(0)
1492 m1 = m.get_payload(1)
1493 unless(m0 is self._txt)
1494 unless(m1 is self._im)
1495 eq(m.get_payload(), [m0, m1])
1496 unless(not m0.is_multipart())
1497 unless(not m1.is_multipart())
1498
1499 def test_empty_multipart_idempotent(self):
1500 text = """\
1501Content-Type: multipart/mixed; boundary="BOUNDARY"
1502MIME-Version: 1.0
1503Subject: A subject
1504To: aperson@dom.ain
1505From: bperson@dom.ain
1506
1507
1508--BOUNDARY
1509
1510
1511--BOUNDARY--
1512"""
1513 msg = Parser().parsestr(text)
1514 self.ndiffAssertEqual(text, msg.as_string())
1515
1516 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1517 outer = MIMEBase('multipart', 'mixed')
1518 outer['Subject'] = 'A subject'
1519 outer['To'] = 'aperson@dom.ain'
1520 outer['From'] = 'bperson@dom.ain'
1521 outer.set_boundary('BOUNDARY')
1522 self.ndiffAssertEqual(outer.as_string(), '''\
1523Content-Type: multipart/mixed; boundary="BOUNDARY"
1524MIME-Version: 1.0
1525Subject: A subject
1526To: aperson@dom.ain
1527From: bperson@dom.ain
1528
1529--BOUNDARY
1530
1531--BOUNDARY--''')
1532
1533 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1534 outer = MIMEBase('multipart', 'mixed')
1535 outer['Subject'] = 'A subject'
1536 outer['To'] = 'aperson@dom.ain'
1537 outer['From'] = 'bperson@dom.ain'
1538 outer.preamble = ''
1539 outer.epilogue = ''
1540 outer.set_boundary('BOUNDARY')
1541 self.ndiffAssertEqual(outer.as_string(), '''\
1542Content-Type: multipart/mixed; boundary="BOUNDARY"
1543MIME-Version: 1.0
1544Subject: A subject
1545To: aperson@dom.ain
1546From: bperson@dom.ain
1547
1548
1549--BOUNDARY
1550
1551--BOUNDARY--
1552''')
1553
1554 def test_one_part_in_a_multipart(self):
1555 eq = self.ndiffAssertEqual
1556 outer = MIMEBase('multipart', 'mixed')
1557 outer['Subject'] = 'A subject'
1558 outer['To'] = 'aperson@dom.ain'
1559 outer['From'] = 'bperson@dom.ain'
1560 outer.set_boundary('BOUNDARY')
1561 msg = MIMEText('hello world')
1562 outer.attach(msg)
1563 eq(outer.as_string(), '''\
1564Content-Type: multipart/mixed; boundary="BOUNDARY"
1565MIME-Version: 1.0
1566Subject: A subject
1567To: aperson@dom.ain
1568From: bperson@dom.ain
1569
1570--BOUNDARY
1571Content-Type: text/plain; charset="us-ascii"
1572MIME-Version: 1.0
1573Content-Transfer-Encoding: 7bit
1574
1575hello world
1576--BOUNDARY--''')
1577
1578 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1579 eq = self.ndiffAssertEqual
1580 outer = MIMEBase('multipart', 'mixed')
1581 outer['Subject'] = 'A subject'
1582 outer['To'] = 'aperson@dom.ain'
1583 outer['From'] = 'bperson@dom.ain'
1584 outer.preamble = ''
1585 msg = MIMEText('hello world')
1586 outer.attach(msg)
1587 outer.set_boundary('BOUNDARY')
1588 eq(outer.as_string(), '''\
1589Content-Type: multipart/mixed; boundary="BOUNDARY"
1590MIME-Version: 1.0
1591Subject: A subject
1592To: aperson@dom.ain
1593From: bperson@dom.ain
1594
1595
1596--BOUNDARY
1597Content-Type: text/plain; charset="us-ascii"
1598MIME-Version: 1.0
1599Content-Transfer-Encoding: 7bit
1600
1601hello world
1602--BOUNDARY--''')
1603
1604
1605 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1606 eq = self.ndiffAssertEqual
1607 outer = MIMEBase('multipart', 'mixed')
1608 outer['Subject'] = 'A subject'
1609 outer['To'] = 'aperson@dom.ain'
1610 outer['From'] = 'bperson@dom.ain'
1611 outer.preamble = None
1612 msg = MIMEText('hello world')
1613 outer.attach(msg)
1614 outer.set_boundary('BOUNDARY')
1615 eq(outer.as_string(), '''\
1616Content-Type: multipart/mixed; boundary="BOUNDARY"
1617MIME-Version: 1.0
1618Subject: A subject
1619To: aperson@dom.ain
1620From: bperson@dom.ain
1621
1622--BOUNDARY
1623Content-Type: text/plain; charset="us-ascii"
1624MIME-Version: 1.0
1625Content-Transfer-Encoding: 7bit
1626
1627hello world
1628--BOUNDARY--''')
1629
1630
1631 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1632 eq = self.ndiffAssertEqual
1633 outer = MIMEBase('multipart', 'mixed')
1634 outer['Subject'] = 'A subject'
1635 outer['To'] = 'aperson@dom.ain'
1636 outer['From'] = 'bperson@dom.ain'
1637 outer.epilogue = None
1638 msg = MIMEText('hello world')
1639 outer.attach(msg)
1640 outer.set_boundary('BOUNDARY')
1641 eq(outer.as_string(), '''\
1642Content-Type: multipart/mixed; boundary="BOUNDARY"
1643MIME-Version: 1.0
1644Subject: A subject
1645To: aperson@dom.ain
1646From: bperson@dom.ain
1647
1648--BOUNDARY
1649Content-Type: text/plain; charset="us-ascii"
1650MIME-Version: 1.0
1651Content-Transfer-Encoding: 7bit
1652
1653hello world
1654--BOUNDARY--''')
1655
1656
1657 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1658 eq = self.ndiffAssertEqual
1659 outer = MIMEBase('multipart', 'mixed')
1660 outer['Subject'] = 'A subject'
1661 outer['To'] = 'aperson@dom.ain'
1662 outer['From'] = 'bperson@dom.ain'
1663 outer.epilogue = ''
1664 msg = MIMEText('hello world')
1665 outer.attach(msg)
1666 outer.set_boundary('BOUNDARY')
1667 eq(outer.as_string(), '''\
1668Content-Type: multipart/mixed; boundary="BOUNDARY"
1669MIME-Version: 1.0
1670Subject: A subject
1671To: aperson@dom.ain
1672From: bperson@dom.ain
1673
1674--BOUNDARY
1675Content-Type: text/plain; charset="us-ascii"
1676MIME-Version: 1.0
1677Content-Transfer-Encoding: 7bit
1678
1679hello world
1680--BOUNDARY--
1681''')
1682
1683
1684 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1685 eq = self.ndiffAssertEqual
1686 outer = MIMEBase('multipart', 'mixed')
1687 outer['Subject'] = 'A subject'
1688 outer['To'] = 'aperson@dom.ain'
1689 outer['From'] = 'bperson@dom.ain'
1690 outer.epilogue = '\n'
1691 msg = MIMEText('hello world')
1692 outer.attach(msg)
1693 outer.set_boundary('BOUNDARY')
1694 eq(outer.as_string(), '''\
1695Content-Type: multipart/mixed; boundary="BOUNDARY"
1696MIME-Version: 1.0
1697Subject: A subject
1698To: aperson@dom.ain
1699From: bperson@dom.ain
1700
1701--BOUNDARY
1702Content-Type: text/plain; charset="us-ascii"
1703MIME-Version: 1.0
1704Content-Transfer-Encoding: 7bit
1705
1706hello world
1707--BOUNDARY--
1708
1709''')
1710
1711 def test_message_external_body(self):
1712 eq = self.assertEqual
1713 msg = self._msgobj('msg_36.txt')
1714 eq(len(msg.get_payload()), 2)
1715 msg1 = msg.get_payload(1)
1716 eq(msg1.get_content_type(), 'multipart/alternative')
1717 eq(len(msg1.get_payload()), 2)
1718 for subpart in msg1.get_payload():
1719 eq(subpart.get_content_type(), 'message/external-body')
1720 eq(len(subpart.get_payload()), 1)
1721 subsubpart = subpart.get_payload(0)
1722 eq(subsubpart.get_content_type(), 'text/plain')
1723
1724 def test_double_boundary(self):
1725 # msg_37.txt is a multipart that contains two dash-boundary's in a
1726 # row. Our interpretation of RFC 2046 calls for ignoring the second
1727 # and subsequent boundaries.
1728 msg = self._msgobj('msg_37.txt')
1729 self.assertEqual(len(msg.get_payload()), 3)
1730
1731 def test_nested_inner_contains_outer_boundary(self):
1732 eq = self.ndiffAssertEqual
1733 # msg_38.txt has an inner part that contains outer boundaries. My
1734 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1735 # these are illegal and should be interpreted as unterminated inner
1736 # parts.
1737 msg = self._msgobj('msg_38.txt')
1738 sfp = StringIO()
1739 iterators._structure(msg, sfp)
1740 eq(sfp.getvalue(), """\
1741multipart/mixed
1742 multipart/mixed
1743 multipart/alternative
1744 text/plain
1745 text/plain
1746 text/plain
1747 text/plain
1748""")
1749
1750 def test_nested_with_same_boundary(self):
1751 eq = self.ndiffAssertEqual
1752 # msg 39.txt is similarly evil in that it's got inner parts that use
1753 # the same boundary as outer parts. Again, I believe the way this is
1754 # parsed is closest to the spirit of RFC 2046
1755 msg = self._msgobj('msg_39.txt')
1756 sfp = StringIO()
1757 iterators._structure(msg, sfp)
1758 eq(sfp.getvalue(), """\
1759multipart/mixed
1760 multipart/mixed
1761 multipart/alternative
1762 application/octet-stream
1763 application/octet-stream
1764 text/plain
1765""")
1766
1767 def test_boundary_in_non_multipart(self):
1768 msg = self._msgobj('msg_40.txt')
1769 self.assertEqual(msg.as_string(), '''\
1770MIME-Version: 1.0
1771Content-Type: text/html; boundary="--961284236552522269"
1772
1773----961284236552522269
1774Content-Type: text/html;
1775Content-Transfer-Encoding: 7Bit
1776
1777<html></html>
1778
1779----961284236552522269--
1780''')
1781
1782 def test_boundary_with_leading_space(self):
1783 eq = self.assertEqual
1784 msg = email.message_from_string('''\
1785MIME-Version: 1.0
1786Content-Type: multipart/mixed; boundary=" XXXX"
1787
1788-- XXXX
1789Content-Type: text/plain
1790
1791
1792-- XXXX
1793Content-Type: text/plain
1794
1795-- XXXX--
1796''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001797 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001798 eq(msg.get_boundary(), ' XXXX')
1799 eq(len(msg.get_payload()), 2)
1800
1801 def test_boundary_without_trailing_newline(self):
1802 m = Parser().parsestr("""\
1803Content-Type: multipart/mixed; boundary="===============0012394164=="
1804MIME-Version: 1.0
1805
1806--===============0012394164==
1807Content-Type: image/file1.jpg
1808MIME-Version: 1.0
1809Content-Transfer-Encoding: base64
1810
1811YXNkZg==
1812--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001813 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001814
1815
Ezio Melottib3aedd42010-11-20 19:04:17 +00001816
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001817# Test some badly formatted messages
R David Murrayc27e5222012-05-25 15:01:48 -04001818class TestNonConformant(TestEmailBase):
R David Murray3edd22a2011-04-18 13:59:37 -04001819
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001820 def test_parse_missing_minor_type(self):
1821 eq = self.assertEqual
1822 msg = self._msgobj('msg_14.txt')
1823 eq(msg.get_content_type(), 'text/plain')
1824 eq(msg.get_content_maintype(), 'text')
1825 eq(msg.get_content_subtype(), 'plain')
1826
R David Murray80e0aee2012-05-27 21:23:34 -04001827 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001828 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001829 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001830 msg = self._msgobj('msg_15.txt')
1831 # XXX We can probably eventually do better
1832 inner = msg.get_payload(0)
1833 unless(hasattr(inner, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04001834 self.assertEqual(len(inner.defects), 1)
1835 unless(isinstance(inner.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001836 errors.StartBoundaryNotFoundDefect))
1837
R David Murray80e0aee2012-05-27 21:23:34 -04001838 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001839 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001840 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001841 msg = self._msgobj('msg_25.txt')
1842 unless(isinstance(msg.get_payload(), str))
R David Murrayc27e5222012-05-25 15:01:48 -04001843 self.assertEqual(len(msg.defects), 2)
1844 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04001845 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04001846 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001847 errors.MultipartInvariantViolationDefect))
1848
R David Murray749073a2011-06-22 13:47:53 -04001849 multipart_msg = textwrap.dedent("""\
1850 Date: Wed, 14 Nov 2007 12:56:23 GMT
1851 From: foo@bar.invalid
1852 To: foo@bar.invalid
1853 Subject: Content-Transfer-Encoding: base64 and multipart
1854 MIME-Version: 1.0
1855 Content-Type: multipart/mixed;
1856 boundary="===============3344438784458119861=="{}
1857
1858 --===============3344438784458119861==
1859 Content-Type: text/plain
1860
1861 Test message
1862
1863 --===============3344438784458119861==
1864 Content-Type: application/octet-stream
1865 Content-Transfer-Encoding: base64
1866
1867 YWJj
1868
1869 --===============3344438784458119861==--
1870 """)
1871
R David Murray80e0aee2012-05-27 21:23:34 -04001872 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04001873 def test_multipart_invalid_cte(self):
R David Murrayc27e5222012-05-25 15:01:48 -04001874 msg = self._str_msg(
1875 self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
1876 self.assertEqual(len(msg.defects), 1)
1877 self.assertIsInstance(msg.defects[0],
R David Murray749073a2011-06-22 13:47:53 -04001878 errors.InvalidMultipartContentTransferEncodingDefect)
1879
R David Murray80e0aee2012-05-27 21:23:34 -04001880 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04001881 def test_multipart_no_cte_no_defect(self):
R David Murrayc27e5222012-05-25 15:01:48 -04001882 msg = self._str_msg(self.multipart_msg.format(''))
1883 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04001884
R David Murray80e0aee2012-05-27 21:23:34 -04001885 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04001886 def test_multipart_valid_cte_no_defect(self):
1887 for cte in ('7bit', '8bit', 'BINary'):
R David Murrayc27e5222012-05-25 15:01:48 -04001888 msg = self._str_msg(
R David Murray749073a2011-06-22 13:47:53 -04001889 self.multipart_msg.format(
R David Murrayc27e5222012-05-25 15:01:48 -04001890 "\nContent-Transfer-Encoding: {}".format(cte)))
1891 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04001892
R David Murray97f43c02012-06-24 05:03:27 -04001893 # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001894 def test_invalid_content_type(self):
1895 eq = self.assertEqual
1896 neq = self.ndiffAssertEqual
1897 msg = Message()
1898 # RFC 2045, $5.2 says invalid yields text/plain
1899 msg['Content-Type'] = 'text'
1900 eq(msg.get_content_maintype(), 'text')
1901 eq(msg.get_content_subtype(), 'plain')
1902 eq(msg.get_content_type(), 'text/plain')
1903 # Clear the old value and try something /really/ invalid
1904 del msg['content-type']
1905 msg['Content-Type'] = 'foo'
1906 eq(msg.get_content_maintype(), 'text')
1907 eq(msg.get_content_subtype(), 'plain')
1908 eq(msg.get_content_type(), 'text/plain')
1909 # Still, make sure that the message is idempotently generated
1910 s = StringIO()
1911 g = Generator(s)
1912 g.flatten(msg)
1913 neq(s.getvalue(), 'Content-Type: foo\n\n')
1914
1915 def test_no_start_boundary(self):
1916 eq = self.ndiffAssertEqual
1917 msg = self._msgobj('msg_31.txt')
1918 eq(msg.get_payload(), """\
1919--BOUNDARY
1920Content-Type: text/plain
1921
1922message 1
1923
1924--BOUNDARY
1925Content-Type: text/plain
1926
1927message 2
1928
1929--BOUNDARY--
1930""")
1931
1932 def test_no_separating_blank_line(self):
1933 eq = self.ndiffAssertEqual
1934 msg = self._msgobj('msg_35.txt')
1935 eq(msg.as_string(), """\
1936From: aperson@dom.ain
1937To: bperson@dom.ain
1938Subject: here's something interesting
1939
1940counter to RFC 2822, there's no separating newline here
1941""")
1942
R David Murray80e0aee2012-05-27 21:23:34 -04001943 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001944 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001945 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001946 msg = self._msgobj('msg_41.txt')
1947 unless(hasattr(msg, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04001948 self.assertEqual(len(msg.defects), 2)
1949 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04001950 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04001951 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001952 errors.MultipartInvariantViolationDefect))
1953
R David Murray80e0aee2012-05-27 21:23:34 -04001954 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001955 def test_missing_start_boundary(self):
1956 outer = self._msgobj('msg_42.txt')
1957 # The message structure is:
1958 #
1959 # multipart/mixed
1960 # text/plain
1961 # message/rfc822
1962 # multipart/mixed [*]
1963 #
1964 # [*] This message is missing its start boundary
1965 bad = outer.get_payload(1).get_payload(0)
R David Murrayc27e5222012-05-25 15:01:48 -04001966 self.assertEqual(len(bad.defects), 1)
1967 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001968 errors.StartBoundaryNotFoundDefect))
1969
R David Murray80e0aee2012-05-27 21:23:34 -04001970 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001971 def test_first_line_is_continuation_header(self):
1972 eq = self.assertEqual
R David Murrayadbdcdb2012-05-27 20:45:01 -04001973 m = ' Line 1\nSubject: test\n\nbody'
R David Murrayc27e5222012-05-25 15:01:48 -04001974 msg = email.message_from_string(m)
R David Murrayadbdcdb2012-05-27 20:45:01 -04001975 eq(msg.keys(), ['Subject'])
1976 eq(msg.get_payload(), 'body')
R David Murrayc27e5222012-05-25 15:01:48 -04001977 eq(len(msg.defects), 1)
R David Murrayadbdcdb2012-05-27 20:45:01 -04001978 self.assertDefectsEqual(msg.defects,
1979 [errors.FirstHeaderLineIsContinuationDefect])
R David Murrayc27e5222012-05-25 15:01:48 -04001980 eq(msg.defects[0].line, ' Line 1\n')
R David Murray3edd22a2011-04-18 13:59:37 -04001981
R David Murrayd41595b2012-05-28 20:14:10 -04001982 # test_defect_handling
R David Murrayadbdcdb2012-05-27 20:45:01 -04001983 def test_missing_header_body_separator(self):
1984 # Our heuristic if we see a line that doesn't look like a header (no
1985 # leading whitespace but no ':') is to assume that the blank line that
1986 # separates the header from the body is missing, and to stop parsing
1987 # headers and start parsing the body.
1988 msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
1989 self.assertEqual(msg.keys(), ['Subject'])
1990 self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
1991 self.assertDefectsEqual(msg.defects,
1992 [errors.MissingHeaderBodySeparatorDefect])
1993
Ezio Melottib3aedd42010-11-20 19:04:17 +00001994
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001995# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001996class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001997 def test_rfc2047_multiline(self):
1998 eq = self.assertEqual
1999 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2000 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2001 dh = decode_header(s)
2002 eq(dh, [
R David Murray07ea53c2012-06-02 17:56:49 -04002003 (b'Re: ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002004 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
R David Murray07ea53c2012-06-02 17:56:49 -04002005 (b' baz foo bar ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002006 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2007 header = make_header(dh)
2008 eq(str(header),
2009 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00002010 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002011Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2012 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002013
R David Murray07ea53c2012-06-02 17:56:49 -04002014 def test_whitespace_keeper_unicode(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002015 eq = self.assertEqual
2016 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2017 dh = decode_header(s)
2018 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
R David Murray07ea53c2012-06-02 17:56:49 -04002019 (b' Pirard <pirard@dom.ain>', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002020 header = str(make_header(dh))
2021 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2022
R David Murray07ea53c2012-06-02 17:56:49 -04002023 def test_whitespace_keeper_unicode_2(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002024 eq = self.assertEqual
2025 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2026 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002027 eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'),
2028 (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002029 hu = str(make_header(dh))
2030 eq(hu, 'The quick brown fox jumped over the lazy dog')
2031
2032 def test_rfc2047_missing_whitespace(self):
2033 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2034 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002035 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2036 (b'rg', None), (b'\xe5', 'iso-8859-1'),
2037 (b'sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002038
2039 def test_rfc2047_with_whitespace(self):
2040 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2041 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002042 self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'),
2043 (b' rg ', None), (b'\xe5', 'iso-8859-1'),
2044 (b' sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002045
R. David Murrayc4e69cc2010-08-03 22:14:10 +00002046 def test_rfc2047_B_bad_padding(self):
2047 s = '=?iso-8859-1?B?%s?='
2048 data = [ # only test complete bytes
2049 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2050 ('dmk=', b'vi'), ('dmk', b'vi')
2051 ]
2052 for q, a in data:
2053 dh = decode_header(s % q)
2054 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002055
R. David Murray31e984c2010-10-01 15:40:20 +00002056 def test_rfc2047_Q_invalid_digits(self):
2057 # issue 10004.
2058 s = '=?iso-8659-1?Q?andr=e9=zz?='
2059 self.assertEqual(decode_header(s),
2060 [(b'andr\xe9=zz', 'iso-8659-1')])
2061
R David Murray07ea53c2012-06-02 17:56:49 -04002062 def test_rfc2047_rfc2047_1(self):
2063 # 1st testcase at end of rfc2047
2064 s = '(=?ISO-8859-1?Q?a?=)'
2065 self.assertEqual(decode_header(s),
2066 [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)])
2067
2068 def test_rfc2047_rfc2047_2(self):
2069 # 2nd testcase at end of rfc2047
2070 s = '(=?ISO-8859-1?Q?a?= b)'
2071 self.assertEqual(decode_header(s),
2072 [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)])
2073
2074 def test_rfc2047_rfc2047_3(self):
2075 # 3rd testcase at end of rfc2047
2076 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2077 self.assertEqual(decode_header(s),
2078 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2079
2080 def test_rfc2047_rfc2047_4(self):
2081 # 4th testcase at end of rfc2047
2082 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2083 self.assertEqual(decode_header(s),
2084 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2085
2086 def test_rfc2047_rfc2047_5a(self):
2087 # 5th testcase at end of rfc2047 newline is \r\n
2088 s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)'
2089 self.assertEqual(decode_header(s),
2090 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2091
2092 def test_rfc2047_rfc2047_5b(self):
2093 # 5th testcase at end of rfc2047 newline is \n
2094 s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)'
2095 self.assertEqual(decode_header(s),
2096 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2097
2098 def test_rfc2047_rfc2047_6(self):
2099 # 6th testcase at end of rfc2047
2100 s = '(=?ISO-8859-1?Q?a_b?=)'
2101 self.assertEqual(decode_header(s),
2102 [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)])
2103
2104 def test_rfc2047_rfc2047_7(self):
2105 # 7th testcase at end of rfc2047
2106 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)'
2107 self.assertEqual(decode_header(s),
2108 [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'),
2109 (b')', None)])
2110 self.assertEqual(make_header(decode_header(s)).encode(), s.lower())
2111 self.assertEqual(str(make_header(decode_header(s))), '(a b)')
2112
R David Murray82ffabd2012-06-03 12:27:07 -04002113 def test_multiline_header(self):
2114 s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>'
2115 self.assertEqual(decode_header(s),
2116 [(b'"M\xfcller T"', 'windows-1252'),
2117 (b'<T.Mueller@xxx.com>', None)])
2118 self.assertEqual(make_header(decode_header(s)).encode(),
2119 ''.join(s.splitlines()))
2120 self.assertEqual(str(make_header(decode_header(s))),
2121 '"Müller T" <T.Mueller@xxx.com>')
2122
Ezio Melottib3aedd42010-11-20 19:04:17 +00002123
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002124# Test the MIMEMessage class
2125class TestMIMEMessage(TestEmailBase):
2126 def setUp(self):
2127 with openfile('msg_11.txt') as fp:
2128 self._text = fp.read()
2129
2130 def test_type_error(self):
2131 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2132
2133 def test_valid_argument(self):
2134 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002135 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002136 subject = 'A sub-message'
2137 m = Message()
2138 m['Subject'] = subject
2139 r = MIMEMessage(m)
2140 eq(r.get_content_type(), 'message/rfc822')
2141 payload = r.get_payload()
2142 unless(isinstance(payload, list))
2143 eq(len(payload), 1)
2144 subpart = payload[0]
2145 unless(subpart is m)
2146 eq(subpart['subject'], subject)
2147
2148 def test_bad_multipart(self):
2149 eq = self.assertEqual
2150 msg1 = Message()
2151 msg1['Subject'] = 'subpart 1'
2152 msg2 = Message()
2153 msg2['Subject'] = 'subpart 2'
2154 r = MIMEMessage(msg1)
2155 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2156
2157 def test_generate(self):
2158 # First craft the message to be encapsulated
2159 m = Message()
2160 m['Subject'] = 'An enclosed message'
2161 m.set_payload('Here is the body of the message.\n')
2162 r = MIMEMessage(m)
2163 r['Subject'] = 'The enclosing message'
2164 s = StringIO()
2165 g = Generator(s)
2166 g.flatten(r)
2167 self.assertEqual(s.getvalue(), """\
2168Content-Type: message/rfc822
2169MIME-Version: 1.0
2170Subject: The enclosing message
2171
2172Subject: An enclosed message
2173
2174Here is the body of the message.
2175""")
2176
2177 def test_parse_message_rfc822(self):
2178 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002179 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002180 msg = self._msgobj('msg_11.txt')
2181 eq(msg.get_content_type(), 'message/rfc822')
2182 payload = msg.get_payload()
2183 unless(isinstance(payload, list))
2184 eq(len(payload), 1)
2185 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002186 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002187 eq(submsg['subject'], 'An enclosed message')
2188 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2189
2190 def test_dsn(self):
2191 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002192 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002193 # msg 16 is a Delivery Status Notification, see RFC 1894
2194 msg = self._msgobj('msg_16.txt')
2195 eq(msg.get_content_type(), 'multipart/report')
2196 unless(msg.is_multipart())
2197 eq(len(msg.get_payload()), 3)
2198 # Subpart 1 is a text/plain, human readable section
2199 subpart = msg.get_payload(0)
2200 eq(subpart.get_content_type(), 'text/plain')
2201 eq(subpart.get_payload(), """\
2202This report relates to a message you sent with the following header fields:
2203
2204 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2205 Date: Sun, 23 Sep 2001 20:10:55 -0700
2206 From: "Ian T. Henry" <henryi@oxy.edu>
2207 To: SoCal Raves <scr@socal-raves.org>
2208 Subject: [scr] yeah for Ians!!
2209
2210Your message cannot be delivered to the following recipients:
2211
2212 Recipient address: jangel1@cougar.noc.ucla.edu
2213 Reason: recipient reached disk quota
2214
2215""")
2216 # Subpart 2 contains the machine parsable DSN information. It
2217 # consists of two blocks of headers, represented by two nested Message
2218 # objects.
2219 subpart = msg.get_payload(1)
2220 eq(subpart.get_content_type(), 'message/delivery-status')
2221 eq(len(subpart.get_payload()), 2)
2222 # message/delivery-status should treat each block as a bunch of
2223 # headers, i.e. a bunch of Message objects.
2224 dsn1 = subpart.get_payload(0)
2225 unless(isinstance(dsn1, Message))
2226 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2227 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2228 # Try a missing one <wink>
2229 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2230 dsn2 = subpart.get_payload(1)
2231 unless(isinstance(dsn2, Message))
2232 eq(dsn2['action'], 'failed')
2233 eq(dsn2.get_params(header='original-recipient'),
2234 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2235 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2236 # Subpart 3 is the original message
2237 subpart = msg.get_payload(2)
2238 eq(subpart.get_content_type(), 'message/rfc822')
2239 payload = subpart.get_payload()
2240 unless(isinstance(payload, list))
2241 eq(len(payload), 1)
2242 subsubpart = payload[0]
2243 unless(isinstance(subsubpart, Message))
2244 eq(subsubpart.get_content_type(), 'text/plain')
2245 eq(subsubpart['message-id'],
2246 '<002001c144a6$8752e060$56104586@oxy.edu>')
2247
2248 def test_epilogue(self):
2249 eq = self.ndiffAssertEqual
2250 with openfile('msg_21.txt') as fp:
2251 text = fp.read()
2252 msg = Message()
2253 msg['From'] = 'aperson@dom.ain'
2254 msg['To'] = 'bperson@dom.ain'
2255 msg['Subject'] = 'Test'
2256 msg.preamble = 'MIME message'
2257 msg.epilogue = 'End of MIME message\n'
2258 msg1 = MIMEText('One')
2259 msg2 = MIMEText('Two')
2260 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2261 msg.attach(msg1)
2262 msg.attach(msg2)
2263 sfp = StringIO()
2264 g = Generator(sfp)
2265 g.flatten(msg)
2266 eq(sfp.getvalue(), text)
2267
2268 def test_no_nl_preamble(self):
2269 eq = self.ndiffAssertEqual
2270 msg = Message()
2271 msg['From'] = 'aperson@dom.ain'
2272 msg['To'] = 'bperson@dom.ain'
2273 msg['Subject'] = 'Test'
2274 msg.preamble = 'MIME message'
2275 msg.epilogue = ''
2276 msg1 = MIMEText('One')
2277 msg2 = MIMEText('Two')
2278 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2279 msg.attach(msg1)
2280 msg.attach(msg2)
2281 eq(msg.as_string(), """\
2282From: aperson@dom.ain
2283To: bperson@dom.ain
2284Subject: Test
2285Content-Type: multipart/mixed; boundary="BOUNDARY"
2286
2287MIME message
2288--BOUNDARY
2289Content-Type: text/plain; charset="us-ascii"
2290MIME-Version: 1.0
2291Content-Transfer-Encoding: 7bit
2292
2293One
2294--BOUNDARY
2295Content-Type: text/plain; charset="us-ascii"
2296MIME-Version: 1.0
2297Content-Transfer-Encoding: 7bit
2298
2299Two
2300--BOUNDARY--
2301""")
2302
2303 def test_default_type(self):
2304 eq = self.assertEqual
2305 with openfile('msg_30.txt') as fp:
2306 msg = email.message_from_file(fp)
2307 container1 = msg.get_payload(0)
2308 eq(container1.get_default_type(), 'message/rfc822')
2309 eq(container1.get_content_type(), 'message/rfc822')
2310 container2 = msg.get_payload(1)
2311 eq(container2.get_default_type(), 'message/rfc822')
2312 eq(container2.get_content_type(), 'message/rfc822')
2313 container1a = container1.get_payload(0)
2314 eq(container1a.get_default_type(), 'text/plain')
2315 eq(container1a.get_content_type(), 'text/plain')
2316 container2a = container2.get_payload(0)
2317 eq(container2a.get_default_type(), 'text/plain')
2318 eq(container2a.get_content_type(), 'text/plain')
2319
2320 def test_default_type_with_explicit_container_type(self):
2321 eq = self.assertEqual
2322 with openfile('msg_28.txt') as fp:
2323 msg = email.message_from_file(fp)
2324 container1 = msg.get_payload(0)
2325 eq(container1.get_default_type(), 'message/rfc822')
2326 eq(container1.get_content_type(), 'message/rfc822')
2327 container2 = msg.get_payload(1)
2328 eq(container2.get_default_type(), 'message/rfc822')
2329 eq(container2.get_content_type(), 'message/rfc822')
2330 container1a = container1.get_payload(0)
2331 eq(container1a.get_default_type(), 'text/plain')
2332 eq(container1a.get_content_type(), 'text/plain')
2333 container2a = container2.get_payload(0)
2334 eq(container2a.get_default_type(), 'text/plain')
2335 eq(container2a.get_content_type(), 'text/plain')
2336
2337 def test_default_type_non_parsed(self):
2338 eq = self.assertEqual
2339 neq = self.ndiffAssertEqual
2340 # Set up container
2341 container = MIMEMultipart('digest', 'BOUNDARY')
2342 container.epilogue = ''
2343 # Set up subparts
2344 subpart1a = MIMEText('message 1\n')
2345 subpart2a = MIMEText('message 2\n')
2346 subpart1 = MIMEMessage(subpart1a)
2347 subpart2 = MIMEMessage(subpart2a)
2348 container.attach(subpart1)
2349 container.attach(subpart2)
2350 eq(subpart1.get_content_type(), 'message/rfc822')
2351 eq(subpart1.get_default_type(), 'message/rfc822')
2352 eq(subpart2.get_content_type(), 'message/rfc822')
2353 eq(subpart2.get_default_type(), 'message/rfc822')
2354 neq(container.as_string(0), '''\
2355Content-Type: multipart/digest; boundary="BOUNDARY"
2356MIME-Version: 1.0
2357
2358--BOUNDARY
2359Content-Type: message/rfc822
2360MIME-Version: 1.0
2361
2362Content-Type: text/plain; charset="us-ascii"
2363MIME-Version: 1.0
2364Content-Transfer-Encoding: 7bit
2365
2366message 1
2367
2368--BOUNDARY
2369Content-Type: message/rfc822
2370MIME-Version: 1.0
2371
2372Content-Type: text/plain; charset="us-ascii"
2373MIME-Version: 1.0
2374Content-Transfer-Encoding: 7bit
2375
2376message 2
2377
2378--BOUNDARY--
2379''')
2380 del subpart1['content-type']
2381 del subpart1['mime-version']
2382 del subpart2['content-type']
2383 del subpart2['mime-version']
2384 eq(subpart1.get_content_type(), 'message/rfc822')
2385 eq(subpart1.get_default_type(), 'message/rfc822')
2386 eq(subpart2.get_content_type(), 'message/rfc822')
2387 eq(subpart2.get_default_type(), 'message/rfc822')
2388 neq(container.as_string(0), '''\
2389Content-Type: multipart/digest; boundary="BOUNDARY"
2390MIME-Version: 1.0
2391
2392--BOUNDARY
2393
2394Content-Type: text/plain; charset="us-ascii"
2395MIME-Version: 1.0
2396Content-Transfer-Encoding: 7bit
2397
2398message 1
2399
2400--BOUNDARY
2401
2402Content-Type: text/plain; charset="us-ascii"
2403MIME-Version: 1.0
2404Content-Transfer-Encoding: 7bit
2405
2406message 2
2407
2408--BOUNDARY--
2409''')
2410
2411 def test_mime_attachments_in_constructor(self):
2412 eq = self.assertEqual
2413 text1 = MIMEText('')
2414 text2 = MIMEText('')
2415 msg = MIMEMultipart(_subparts=(text1, text2))
2416 eq(len(msg.get_payload()), 2)
2417 eq(msg.get_payload(0), text1)
2418 eq(msg.get_payload(1), text2)
2419
Christian Heimes587c2bf2008-01-19 16:21:02 +00002420 def test_default_multipart_constructor(self):
2421 msg = MIMEMultipart()
2422 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002423
Ezio Melottib3aedd42010-11-20 19:04:17 +00002424
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002425# A general test of parser->model->generator idempotency. IOW, read a message
2426# in, parse it into a message object tree, then without touching the tree,
2427# regenerate the plain text. The original text and the transformed text
2428# should be identical. Note: that we ignore the Unix-From since that may
2429# contain a changed date.
2430class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002431
2432 linesep = '\n'
2433
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002434 def _msgobj(self, filename):
2435 with openfile(filename) as fp:
2436 data = fp.read()
2437 msg = email.message_from_string(data)
2438 return msg, data
2439
R. David Murray719a4492010-11-21 16:53:48 +00002440 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002441 eq = self.ndiffAssertEqual
2442 s = StringIO()
2443 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002444 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002445 eq(text, s.getvalue())
2446
2447 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002448 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002449 msg, text = self._msgobj('msg_01.txt')
2450 eq(msg.get_content_type(), 'text/plain')
2451 eq(msg.get_content_maintype(), 'text')
2452 eq(msg.get_content_subtype(), 'plain')
2453 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2454 eq(msg.get_param('charset'), 'us-ascii')
2455 eq(msg.preamble, None)
2456 eq(msg.epilogue, None)
2457 self._idempotent(msg, text)
2458
2459 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002460 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002461 msg, text = self._msgobj('msg_03.txt')
2462 eq(msg.get_content_type(), 'text/plain')
2463 eq(msg.get_params(), None)
2464 eq(msg.get_param('charset'), None)
2465 self._idempotent(msg, text)
2466
2467 def test_simple_multipart(self):
2468 msg, text = self._msgobj('msg_04.txt')
2469 self._idempotent(msg, text)
2470
2471 def test_MIME_digest(self):
2472 msg, text = self._msgobj('msg_02.txt')
2473 self._idempotent(msg, text)
2474
2475 def test_long_header(self):
2476 msg, text = self._msgobj('msg_27.txt')
2477 self._idempotent(msg, text)
2478
2479 def test_MIME_digest_with_part_headers(self):
2480 msg, text = self._msgobj('msg_28.txt')
2481 self._idempotent(msg, text)
2482
2483 def test_mixed_with_image(self):
2484 msg, text = self._msgobj('msg_06.txt')
2485 self._idempotent(msg, text)
2486
2487 def test_multipart_report(self):
2488 msg, text = self._msgobj('msg_05.txt')
2489 self._idempotent(msg, text)
2490
2491 def test_dsn(self):
2492 msg, text = self._msgobj('msg_16.txt')
2493 self._idempotent(msg, text)
2494
2495 def test_preamble_epilogue(self):
2496 msg, text = self._msgobj('msg_21.txt')
2497 self._idempotent(msg, text)
2498
2499 def test_multipart_one_part(self):
2500 msg, text = self._msgobj('msg_23.txt')
2501 self._idempotent(msg, text)
2502
2503 def test_multipart_no_parts(self):
2504 msg, text = self._msgobj('msg_24.txt')
2505 self._idempotent(msg, text)
2506
2507 def test_no_start_boundary(self):
2508 msg, text = self._msgobj('msg_31.txt')
2509 self._idempotent(msg, text)
2510
2511 def test_rfc2231_charset(self):
2512 msg, text = self._msgobj('msg_32.txt')
2513 self._idempotent(msg, text)
2514
2515 def test_more_rfc2231_parameters(self):
2516 msg, text = self._msgobj('msg_33.txt')
2517 self._idempotent(msg, text)
2518
2519 def test_text_plain_in_a_multipart_digest(self):
2520 msg, text = self._msgobj('msg_34.txt')
2521 self._idempotent(msg, text)
2522
2523 def test_nested_multipart_mixeds(self):
2524 msg, text = self._msgobj('msg_12a.txt')
2525 self._idempotent(msg, text)
2526
2527 def test_message_external_body_idempotent(self):
2528 msg, text = self._msgobj('msg_36.txt')
2529 self._idempotent(msg, text)
2530
R. David Murray719a4492010-11-21 16:53:48 +00002531 def test_message_delivery_status(self):
2532 msg, text = self._msgobj('msg_43.txt')
2533 self._idempotent(msg, text, unixfrom=True)
2534
R. David Murray96fd54e2010-10-08 15:55:28 +00002535 def test_message_signed_idempotent(self):
2536 msg, text = self._msgobj('msg_45.txt')
2537 self._idempotent(msg, text)
2538
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002539 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002540 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002541 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002542 # Get a message object and reset the seek pointer for other tests
2543 msg, text = self._msgobj('msg_05.txt')
2544 eq(msg.get_content_type(), 'multipart/report')
2545 # Test the Content-Type: parameters
2546 params = {}
2547 for pk, pv in msg.get_params():
2548 params[pk] = pv
2549 eq(params['report-type'], 'delivery-status')
2550 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002551 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2552 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002553 eq(len(msg.get_payload()), 3)
2554 # Make sure the subparts are what we expect
2555 msg1 = msg.get_payload(0)
2556 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002557 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002558 msg2 = msg.get_payload(1)
2559 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002560 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002561 msg3 = msg.get_payload(2)
2562 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002563 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002564 payload = msg3.get_payload()
2565 unless(isinstance(payload, list))
2566 eq(len(payload), 1)
2567 msg4 = payload[0]
2568 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002569 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002570
2571 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002572 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002573 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002574 msg, text = self._msgobj('msg_06.txt')
2575 # Check some of the outer headers
2576 eq(msg.get_content_type(), 'message/rfc822')
2577 # Make sure the payload is a list of exactly one sub-Message, and that
2578 # that submessage has a type of text/plain
2579 payload = msg.get_payload()
2580 unless(isinstance(payload, list))
2581 eq(len(payload), 1)
2582 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002583 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002584 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002585 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002586 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002587
2588
Ezio Melottib3aedd42010-11-20 19:04:17 +00002589
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002590# Test various other bits of the package's functionality
2591class TestMiscellaneous(TestEmailBase):
2592 def test_message_from_string(self):
2593 with openfile('msg_01.txt') as fp:
2594 text = fp.read()
2595 msg = email.message_from_string(text)
2596 s = StringIO()
2597 # Don't wrap/continue long headers since we're trying to test
2598 # idempotency.
2599 g = Generator(s, maxheaderlen=0)
2600 g.flatten(msg)
2601 self.assertEqual(text, s.getvalue())
2602
2603 def test_message_from_file(self):
2604 with openfile('msg_01.txt') as fp:
2605 text = fp.read()
2606 fp.seek(0)
2607 msg = email.message_from_file(fp)
2608 s = StringIO()
2609 # Don't wrap/continue long headers since we're trying to test
2610 # idempotency.
2611 g = Generator(s, maxheaderlen=0)
2612 g.flatten(msg)
2613 self.assertEqual(text, s.getvalue())
2614
2615 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002616 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002617 with openfile('msg_01.txt') as fp:
2618 text = fp.read()
2619
2620 # Create a subclass
2621 class MyMessage(Message):
2622 pass
2623
2624 msg = email.message_from_string(text, MyMessage)
2625 unless(isinstance(msg, MyMessage))
2626 # Try something more complicated
2627 with openfile('msg_02.txt') as fp:
2628 text = fp.read()
2629 msg = email.message_from_string(text, MyMessage)
2630 for subpart in msg.walk():
2631 unless(isinstance(subpart, MyMessage))
2632
2633 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002634 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002635 # Create a subclass
2636 class MyMessage(Message):
2637 pass
2638
2639 with openfile('msg_01.txt') as fp:
2640 msg = email.message_from_file(fp, MyMessage)
2641 unless(isinstance(msg, MyMessage))
2642 # Try something more complicated
2643 with openfile('msg_02.txt') as fp:
2644 msg = email.message_from_file(fp, MyMessage)
2645 for subpart in msg.walk():
2646 unless(isinstance(subpart, MyMessage))
2647
R David Murrayc27e5222012-05-25 15:01:48 -04002648 def test_custom_message_does_not_require_arguments(self):
2649 class MyMessage(Message):
2650 def __init__(self):
2651 super().__init__()
2652 msg = self._str_msg("Subject: test\n\ntest", MyMessage)
2653 self.assertTrue(isinstance(msg, MyMessage))
2654
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002655 def test__all__(self):
2656 module = __import__('email')
R David Murray1b6c7242012-03-16 22:43:05 -04002657 self.assertEqual(sorted(module.__all__), [
2658 'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2659 'generator', 'header', 'iterators', 'message',
2660 'message_from_binary_file', 'message_from_bytes',
2661 'message_from_file', 'message_from_string', 'mime', 'parser',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002662 'quoprimime', 'utils',
2663 ])
2664
2665 def test_formatdate(self):
2666 now = time.time()
2667 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2668 time.gmtime(now)[:6])
2669
2670 def test_formatdate_localtime(self):
2671 now = time.time()
2672 self.assertEqual(
2673 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2674 time.localtime(now)[:6])
2675
2676 def test_formatdate_usegmt(self):
2677 now = time.time()
2678 self.assertEqual(
2679 utils.formatdate(now, localtime=False),
2680 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2681 self.assertEqual(
2682 utils.formatdate(now, localtime=False, usegmt=True),
2683 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2684
2685 def test_parsedate_none(self):
2686 self.assertEqual(utils.parsedate(''), None)
2687
2688 def test_parsedate_compact(self):
2689 # The FWS after the comma is optional
2690 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2691 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2692
2693 def test_parsedate_no_dayofweek(self):
2694 eq = self.assertEqual
2695 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2696 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2697
2698 def test_parsedate_compact_no_dayofweek(self):
2699 eq = self.assertEqual
2700 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2701 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2702
R. David Murray4a62e892010-12-23 20:35:46 +00002703 def test_parsedate_no_space_before_positive_offset(self):
2704 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2705 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2706
2707 def test_parsedate_no_space_before_negative_offset(self):
2708 # Issue 1155362: we already handled '+' for this case.
2709 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2710 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2711
2712
R David Murrayaccd1c02011-03-13 20:06:23 -04002713 def test_parsedate_accepts_time_with_dots(self):
2714 eq = self.assertEqual
2715 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2716 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2717 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2718 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2719
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002720 def test_parsedate_acceptable_to_time_functions(self):
2721 eq = self.assertEqual
2722 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2723 t = int(time.mktime(timetup))
2724 eq(time.localtime(t)[:6], timetup[:6])
2725 eq(int(time.strftime('%Y', timetup)), 2003)
2726 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2727 t = int(time.mktime(timetup[:9]))
2728 eq(time.localtime(t)[:6], timetup[:6])
2729 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2730
Alexander Belopolskya07548e2012-06-21 20:34:09 -04002731 def test_mktime_tz(self):
2732 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2733 -1, -1, -1, 0)), 0)
2734 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2735 -1, -1, -1, 1234)), -1234)
2736
R. David Murray219d1c82010-08-25 00:45:55 +00002737 def test_parsedate_y2k(self):
2738 """Test for parsing a date with a two-digit year.
2739
2740 Parsing a date with a two-digit year should return the correct
2741 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2742 obsoletes RFC822) requires four-digit years.
2743
2744 """
2745 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2746 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2747 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2748 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2749
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002750 def test_parseaddr_empty(self):
2751 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2752 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2753
2754 def test_noquote_dump(self):
2755 self.assertEqual(
2756 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2757 'A Silly Person <person@dom.ain>')
2758
2759 def test_escape_dump(self):
2760 self.assertEqual(
2761 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
R David Murrayb53319f2012-03-14 15:31:47 -04002762 r'"A (Very) Silly Person" <person@dom.ain>')
2763 self.assertEqual(
2764 utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
2765 ('A (Very) Silly Person', 'person@dom.ain'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002766 a = r'A \(Special\) Person'
2767 b = 'person@dom.ain'
2768 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2769
2770 def test_escape_backslashes(self):
2771 self.assertEqual(
2772 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2773 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2774 a = r'Arthur \Backslash\ Foobar'
2775 b = 'person@dom.ain'
2776 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2777
R David Murray8debacb2011-04-06 09:35:57 -04002778 def test_quotes_unicode_names(self):
2779 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2780 name = "H\u00e4ns W\u00fcrst"
2781 addr = 'person@dom.ain'
2782 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2783 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
2784 self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
2785 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
2786 latin1_quopri)
2787
2788 def test_accepts_any_charset_like_object(self):
2789 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2790 name = "H\u00e4ns W\u00fcrst"
2791 addr = 'person@dom.ain'
2792 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2793 foobar = "FOOBAR"
2794 class CharsetMock:
2795 def header_encode(self, string):
2796 return foobar
2797 mock = CharsetMock()
2798 mock_expected = "%s <%s>" % (foobar, addr)
2799 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
2800 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
2801 utf8_base64)
2802
2803 def test_invalid_charset_like_object_raises_error(self):
2804 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2805 name = "H\u00e4ns W\u00fcrst"
2806 addr = 'person@dom.ain'
2807 # A object without a header_encode method:
2808 bad_charset = object()
2809 self.assertRaises(AttributeError, utils.formataddr, (name, addr),
2810 bad_charset)
2811
2812 def test_unicode_address_raises_error(self):
2813 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2814 addr = 'pers\u00f6n@dom.in'
2815 self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
2816 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
2817
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002818 def test_name_with_dot(self):
2819 x = 'John X. Doe <jxd@example.com>'
2820 y = '"John X. Doe" <jxd@example.com>'
2821 a, b = ('John X. Doe', 'jxd@example.com')
2822 self.assertEqual(utils.parseaddr(x), (a, b))
2823 self.assertEqual(utils.parseaddr(y), (a, b))
2824 # formataddr() quotes the name if there's a dot in it
2825 self.assertEqual(utils.formataddr((a, b)), y)
2826
R. David Murray5397e862010-10-02 15:58:26 +00002827 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2828 # issue 10005. Note that in the third test the second pair of
2829 # backslashes is not actually a quoted pair because it is not inside a
2830 # comment or quoted string: the address being parsed has a quoted
2831 # string containing a quoted backslash, followed by 'example' and two
2832 # backslashes, followed by another quoted string containing a space and
2833 # the word 'example'. parseaddr copies those two backslashes
2834 # literally. Per rfc5322 this is not technically correct since a \ may
2835 # not appear in an address outside of a quoted string. It is probably
2836 # a sensible Postel interpretation, though.
2837 eq = self.assertEqual
2838 eq(utils.parseaddr('""example" example"@example.com'),
2839 ('', '""example" example"@example.com'))
2840 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2841 ('', '"\\"example\\" example"@example.com'))
2842 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2843 ('', '"\\\\"example\\\\" example"@example.com'))
2844
R. David Murray63563cd2010-12-18 18:25:38 +00002845 def test_parseaddr_preserves_spaces_in_local_part(self):
2846 # issue 9286. A normal RFC5322 local part should not contain any
2847 # folding white space, but legacy local parts can (they are a sequence
2848 # of atoms, not dotatoms). On the other hand we strip whitespace from
2849 # before the @ and around dots, on the assumption that the whitespace
2850 # around the punctuation is a mistake in what would otherwise be
2851 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2852 self.assertEqual(('', "merwok wok@xample.com"),
2853 utils.parseaddr("merwok wok@xample.com"))
2854 self.assertEqual(('', "merwok wok@xample.com"),
2855 utils.parseaddr("merwok wok@xample.com"))
2856 self.assertEqual(('', "merwok wok@xample.com"),
2857 utils.parseaddr(" merwok wok @xample.com"))
2858 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2859 utils.parseaddr('merwok"wok" wok@xample.com'))
2860 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2861 utils.parseaddr('merwok. wok . wok@xample.com'))
2862
R David Murrayb53319f2012-03-14 15:31:47 -04002863 def test_formataddr_does_not_quote_parens_in_quoted_string(self):
2864 addr = ("'foo@example.com' (foo@example.com)",
2865 'foo@example.com')
2866 addrstr = ('"\'foo@example.com\' '
2867 '(foo@example.com)" <foo@example.com>')
2868 self.assertEqual(utils.parseaddr(addrstr), addr)
2869 self.assertEqual(utils.formataddr(addr), addrstr)
2870
2871
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002872 def test_multiline_from_comment(self):
2873 x = """\
2874Foo
2875\tBar <foo@example.com>"""
2876 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2877
2878 def test_quote_dump(self):
2879 self.assertEqual(
2880 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2881 r'"A Silly; Person" <person@dom.ain>')
2882
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002883 def test_charset_richcomparisons(self):
2884 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002885 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002886 cset1 = Charset()
2887 cset2 = Charset()
2888 eq(cset1, 'us-ascii')
2889 eq(cset1, 'US-ASCII')
2890 eq(cset1, 'Us-AsCiI')
2891 eq('us-ascii', cset1)
2892 eq('US-ASCII', cset1)
2893 eq('Us-AsCiI', cset1)
2894 ne(cset1, 'usascii')
2895 ne(cset1, 'USASCII')
2896 ne(cset1, 'UsAsCiI')
2897 ne('usascii', cset1)
2898 ne('USASCII', cset1)
2899 ne('UsAsCiI', cset1)
2900 eq(cset1, cset2)
2901 eq(cset2, cset1)
2902
2903 def test_getaddresses(self):
2904 eq = self.assertEqual
2905 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2906 'Bud Person <bperson@dom.ain>']),
2907 [('Al Person', 'aperson@dom.ain'),
2908 ('Bud Person', 'bperson@dom.ain')])
2909
2910 def test_getaddresses_nasty(self):
2911 eq = self.assertEqual
2912 eq(utils.getaddresses(['foo: ;']), [('', '')])
2913 eq(utils.getaddresses(
2914 ['[]*-- =~$']),
2915 [('', ''), ('', ''), ('', '*--')])
2916 eq(utils.getaddresses(
2917 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2918 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2919
2920 def test_getaddresses_embedded_comment(self):
2921 """Test proper handling of a nested comment"""
2922 eq = self.assertEqual
2923 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2924 eq(addrs[0][1], 'foo@bar.com')
2925
2926 def test_utils_quote_unquote(self):
2927 eq = self.assertEqual
2928 msg = Message()
2929 msg.add_header('content-disposition', 'attachment',
2930 filename='foo\\wacky"name')
2931 eq(msg.get_filename(), 'foo\\wacky"name')
2932
2933 def test_get_body_encoding_with_bogus_charset(self):
2934 charset = Charset('not a charset')
2935 self.assertEqual(charset.get_body_encoding(), 'base64')
2936
2937 def test_get_body_encoding_with_uppercase_charset(self):
2938 eq = self.assertEqual
2939 msg = Message()
2940 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2941 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2942 charsets = msg.get_charsets()
2943 eq(len(charsets), 1)
2944 eq(charsets[0], 'utf-8')
2945 charset = Charset(charsets[0])
2946 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002947 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002948 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2949 eq(msg.get_payload(decode=True), b'hello world')
2950 eq(msg['content-transfer-encoding'], 'base64')
2951 # Try another one
2952 msg = Message()
2953 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2954 charsets = msg.get_charsets()
2955 eq(len(charsets), 1)
2956 eq(charsets[0], 'us-ascii')
2957 charset = Charset(charsets[0])
2958 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2959 msg.set_payload('hello world', charset=charset)
2960 eq(msg.get_payload(), 'hello world')
2961 eq(msg['content-transfer-encoding'], '7bit')
2962
2963 def test_charsets_case_insensitive(self):
2964 lc = Charset('us-ascii')
2965 uc = Charset('US-ASCII')
2966 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2967
2968 def test_partial_falls_inside_message_delivery_status(self):
2969 eq = self.ndiffAssertEqual
2970 # The Parser interface provides chunks of data to FeedParser in 8192
2971 # byte gulps. SF bug #1076485 found one of those chunks inside
2972 # message/delivery-status header block, which triggered an
2973 # unreadline() of NeedMoreData.
2974 msg = self._msgobj('msg_43.txt')
2975 sfp = StringIO()
2976 iterators._structure(msg, sfp)
2977 eq(sfp.getvalue(), """\
2978multipart/report
2979 text/plain
2980 message/delivery-status
2981 text/plain
2982 text/plain
2983 text/plain
2984 text/plain
2985 text/plain
2986 text/plain
2987 text/plain
2988 text/plain
2989 text/plain
2990 text/plain
2991 text/plain
2992 text/plain
2993 text/plain
2994 text/plain
2995 text/plain
2996 text/plain
2997 text/plain
2998 text/plain
2999 text/plain
3000 text/plain
3001 text/plain
3002 text/plain
3003 text/plain
3004 text/plain
3005 text/plain
3006 text/plain
3007 text/rfc822-headers
3008""")
3009
R. David Murraya0b44b52010-12-02 21:47:19 +00003010 def test_make_msgid_domain(self):
3011 self.assertEqual(
3012 email.utils.make_msgid(domain='testdomain-string')[-19:],
3013 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003014
Ezio Melottib3aedd42010-11-20 19:04:17 +00003015
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003016# Test the iterator/generators
3017class TestIterators(TestEmailBase):
3018 def test_body_line_iterator(self):
3019 eq = self.assertEqual
3020 neq = self.ndiffAssertEqual
3021 # First a simple non-multipart message
3022 msg = self._msgobj('msg_01.txt')
3023 it = iterators.body_line_iterator(msg)
3024 lines = list(it)
3025 eq(len(lines), 6)
3026 neq(EMPTYSTRING.join(lines), msg.get_payload())
3027 # Now a more complicated multipart
3028 msg = self._msgobj('msg_02.txt')
3029 it = iterators.body_line_iterator(msg)
3030 lines = list(it)
3031 eq(len(lines), 43)
3032 with openfile('msg_19.txt') as fp:
3033 neq(EMPTYSTRING.join(lines), fp.read())
3034
3035 def test_typed_subpart_iterator(self):
3036 eq = self.assertEqual
3037 msg = self._msgobj('msg_04.txt')
3038 it = iterators.typed_subpart_iterator(msg, 'text')
3039 lines = []
3040 subparts = 0
3041 for subpart in it:
3042 subparts += 1
3043 lines.append(subpart.get_payload())
3044 eq(subparts, 2)
3045 eq(EMPTYSTRING.join(lines), """\
3046a simple kind of mirror
3047to reflect upon our own
3048a simple kind of mirror
3049to reflect upon our own
3050""")
3051
3052 def test_typed_subpart_iterator_default_type(self):
3053 eq = self.assertEqual
3054 msg = self._msgobj('msg_03.txt')
3055 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
3056 lines = []
3057 subparts = 0
3058 for subpart in it:
3059 subparts += 1
3060 lines.append(subpart.get_payload())
3061 eq(subparts, 1)
3062 eq(EMPTYSTRING.join(lines), """\
3063
3064Hi,
3065
3066Do you like this message?
3067
3068-Me
3069""")
3070
R. David Murray45bf773f2010-07-17 01:19:57 +00003071 def test_pushCR_LF(self):
3072 '''FeedParser BufferedSubFile.push() assumed it received complete
3073 line endings. A CR ending one push() followed by a LF starting
3074 the next push() added an empty line.
3075 '''
3076 imt = [
3077 ("a\r \n", 2),
3078 ("b", 0),
3079 ("c\n", 1),
3080 ("", 0),
3081 ("d\r\n", 1),
3082 ("e\r", 0),
3083 ("\nf", 1),
3084 ("\r\n", 1),
3085 ]
3086 from email.feedparser import BufferedSubFile, NeedMoreData
3087 bsf = BufferedSubFile()
3088 om = []
3089 nt = 0
3090 for il, n in imt:
3091 bsf.push(il)
3092 nt += n
3093 n1 = 0
3094 while True:
3095 ol = bsf.readline()
3096 if ol == NeedMoreData:
3097 break
3098 om.append(ol)
3099 n1 += 1
3100 self.assertTrue(n == n1)
3101 self.assertTrue(len(om) == nt)
3102 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
3103
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003104
Ezio Melottib3aedd42010-11-20 19:04:17 +00003105
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003106class TestParsers(TestEmailBase):
R David Murrayb35c8502011-04-13 16:46:05 -04003107
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003108 def test_header_parser(self):
3109 eq = self.assertEqual
3110 # Parse only the headers of a complex multipart MIME document
3111 with openfile('msg_02.txt') as fp:
3112 msg = HeaderParser().parse(fp)
3113 eq(msg['from'], 'ppp-request@zzz.org')
3114 eq(msg['to'], 'ppp@zzz.org')
3115 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003116 self.assertFalse(msg.is_multipart())
3117 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003118
R David Murrayb35c8502011-04-13 16:46:05 -04003119 def test_bytes_header_parser(self):
3120 eq = self.assertEqual
3121 # Parse only the headers of a complex multipart MIME document
3122 with openfile('msg_02.txt', 'rb') as fp:
3123 msg = email.parser.BytesHeaderParser().parse(fp)
3124 eq(msg['from'], 'ppp-request@zzz.org')
3125 eq(msg['to'], 'ppp@zzz.org')
3126 eq(msg.get_content_type(), 'multipart/mixed')
3127 self.assertFalse(msg.is_multipart())
3128 self.assertTrue(isinstance(msg.get_payload(), str))
3129 self.assertTrue(isinstance(msg.get_payload(decode=True), bytes))
3130
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003131 def test_whitespace_continuation(self):
3132 eq = self.assertEqual
3133 # This message contains a line after the Subject: header that has only
3134 # whitespace, but it is not empty!
3135 msg = email.message_from_string("""\
3136From: aperson@dom.ain
3137To: bperson@dom.ain
3138Subject: the next line has a space on it
3139\x20
3140Date: Mon, 8 Apr 2002 15:09:19 -0400
3141Message-ID: spam
3142
3143Here's the message body
3144""")
3145 eq(msg['subject'], 'the next line has a space on it\n ')
3146 eq(msg['message-id'], 'spam')
3147 eq(msg.get_payload(), "Here's the message body\n")
3148
3149 def test_whitespace_continuation_last_header(self):
3150 eq = self.assertEqual
3151 # Like the previous test, but the subject line is the last
3152 # header.
3153 msg = email.message_from_string("""\
3154From: aperson@dom.ain
3155To: bperson@dom.ain
3156Date: Mon, 8 Apr 2002 15:09:19 -0400
3157Message-ID: spam
3158Subject: the next line has a space on it
3159\x20
3160
3161Here's the message body
3162""")
3163 eq(msg['subject'], 'the next line has a space on it\n ')
3164 eq(msg['message-id'], 'spam')
3165 eq(msg.get_payload(), "Here's the message body\n")
3166
3167 def test_crlf_separation(self):
3168 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003169 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003170 msg = Parser().parse(fp)
3171 eq(len(msg.get_payload()), 2)
3172 part1 = msg.get_payload(0)
3173 eq(part1.get_content_type(), 'text/plain')
3174 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3175 part2 = msg.get_payload(1)
3176 eq(part2.get_content_type(), 'application/riscos')
3177
R. David Murray8451c4b2010-10-23 22:19:56 +00003178 def test_crlf_flatten(self):
3179 # Using newline='\n' preserves the crlfs in this input file.
3180 with openfile('msg_26.txt', newline='\n') as fp:
3181 text = fp.read()
3182 msg = email.message_from_string(text)
3183 s = StringIO()
3184 g = Generator(s)
3185 g.flatten(msg, linesep='\r\n')
3186 self.assertEqual(s.getvalue(), text)
3187
3188 maxDiff = None
3189
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003190 def test_multipart_digest_with_extra_mime_headers(self):
3191 eq = self.assertEqual
3192 neq = self.ndiffAssertEqual
3193 with openfile('msg_28.txt') as fp:
3194 msg = email.message_from_file(fp)
3195 # Structure is:
3196 # multipart/digest
3197 # message/rfc822
3198 # text/plain
3199 # message/rfc822
3200 # text/plain
3201 eq(msg.is_multipart(), 1)
3202 eq(len(msg.get_payload()), 2)
3203 part1 = msg.get_payload(0)
3204 eq(part1.get_content_type(), 'message/rfc822')
3205 eq(part1.is_multipart(), 1)
3206 eq(len(part1.get_payload()), 1)
3207 part1a = part1.get_payload(0)
3208 eq(part1a.is_multipart(), 0)
3209 eq(part1a.get_content_type(), 'text/plain')
3210 neq(part1a.get_payload(), 'message 1\n')
3211 # next message/rfc822
3212 part2 = msg.get_payload(1)
3213 eq(part2.get_content_type(), 'message/rfc822')
3214 eq(part2.is_multipart(), 1)
3215 eq(len(part2.get_payload()), 1)
3216 part2a = part2.get_payload(0)
3217 eq(part2a.is_multipart(), 0)
3218 eq(part2a.get_content_type(), 'text/plain')
3219 neq(part2a.get_payload(), 'message 2\n')
3220
3221 def test_three_lines(self):
3222 # A bug report by Andrew McNamara
3223 lines = ['From: Andrew Person <aperson@dom.ain',
3224 'Subject: Test',
3225 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3226 msg = email.message_from_string(NL.join(lines))
3227 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3228
3229 def test_strip_line_feed_and_carriage_return_in_headers(self):
3230 eq = self.assertEqual
3231 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3232 value1 = 'text'
3233 value2 = 'more text'
3234 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3235 value1, value2)
3236 msg = email.message_from_string(m)
3237 eq(msg.get('Header'), value1)
3238 eq(msg.get('Next-Header'), value2)
3239
3240 def test_rfc2822_header_syntax(self):
3241 eq = self.assertEqual
3242 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3243 msg = email.message_from_string(m)
3244 eq(len(msg), 3)
3245 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3246 eq(msg.get_payload(), 'body')
3247
3248 def test_rfc2822_space_not_allowed_in_header(self):
3249 eq = self.assertEqual
3250 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3251 msg = email.message_from_string(m)
3252 eq(len(msg.keys()), 0)
3253
3254 def test_rfc2822_one_character_header(self):
3255 eq = self.assertEqual
3256 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3257 msg = email.message_from_string(m)
3258 headers = msg.keys()
3259 headers.sort()
3260 eq(headers, ['A', 'B', 'CC'])
3261 eq(msg.get_payload(), 'body')
3262
R. David Murray45e0e142010-06-16 02:19:40 +00003263 def test_CRLFLF_at_end_of_part(self):
3264 # issue 5610: feedparser should not eat two chars from body part ending
3265 # with "\r\n\n".
3266 m = (
3267 "From: foo@bar.com\n"
3268 "To: baz\n"
3269 "Mime-Version: 1.0\n"
3270 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3271 "\n"
3272 "--BOUNDARY\n"
3273 "Content-Type: text/plain\n"
3274 "\n"
3275 "body ending with CRLF newline\r\n"
3276 "\n"
3277 "--BOUNDARY--\n"
3278 )
3279 msg = email.message_from_string(m)
3280 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003281
Ezio Melottib3aedd42010-11-20 19:04:17 +00003282
R. David Murray96fd54e2010-10-08 15:55:28 +00003283class Test8BitBytesHandling(unittest.TestCase):
3284 # In Python3 all input is string, but that doesn't work if the actual input
3285 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3286 # decode byte streams using the surrogateescape error handler, and
3287 # reconvert to binary at appropriate places if we detect surrogates. This
3288 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3289 # but it does allow us to parse and preserve them, and to decode body
3290 # parts that use an 8bit CTE.
3291
3292 bodytest_msg = textwrap.dedent("""\
3293 From: foo@bar.com
3294 To: baz
3295 Mime-Version: 1.0
3296 Content-Type: text/plain; charset={charset}
3297 Content-Transfer-Encoding: {cte}
3298
3299 {bodyline}
3300 """)
3301
3302 def test_known_8bit_CTE(self):
3303 m = self.bodytest_msg.format(charset='utf-8',
3304 cte='8bit',
3305 bodyline='pöstal').encode('utf-8')
3306 msg = email.message_from_bytes(m)
3307 self.assertEqual(msg.get_payload(), "pöstal\n")
3308 self.assertEqual(msg.get_payload(decode=True),
3309 "pöstal\n".encode('utf-8'))
3310
3311 def test_unknown_8bit_CTE(self):
3312 m = self.bodytest_msg.format(charset='notavalidcharset',
3313 cte='8bit',
3314 bodyline='pöstal').encode('utf-8')
3315 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003316 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003317 self.assertEqual(msg.get_payload(decode=True),
3318 "pöstal\n".encode('utf-8'))
3319
3320 def test_8bit_in_quopri_body(self):
3321 # This is non-RFC compliant data...without 'decode' the library code
3322 # decodes the body using the charset from the headers, and because the
3323 # source byte really is utf-8 this works. This is likely to fail
3324 # against real dirty data (ie: produce mojibake), but the data is
3325 # invalid anyway so it is as good a guess as any. But this means that
3326 # this test just confirms the current behavior; that behavior is not
3327 # necessarily the best possible behavior. With 'decode' it is
3328 # returning the raw bytes, so that test should be of correct behavior,
3329 # or at least produce the same result that email4 did.
3330 m = self.bodytest_msg.format(charset='utf-8',
3331 cte='quoted-printable',
3332 bodyline='p=C3=B6stál').encode('utf-8')
3333 msg = email.message_from_bytes(m)
3334 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3335 self.assertEqual(msg.get_payload(decode=True),
3336 'pöstál\n'.encode('utf-8'))
3337
3338 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3339 # This is similar to the previous test, but proves that if the 8bit
3340 # byte is undecodeable in the specified charset, it gets replaced
3341 # by the unicode 'unknown' character. Again, this may or may not
3342 # be the ideal behavior. Note that if decode=False none of the
3343 # decoders will get involved, so this is the only test we need
3344 # for this behavior.
3345 m = self.bodytest_msg.format(charset='ascii',
3346 cte='quoted-printable',
3347 bodyline='p=C3=B6stál').encode('utf-8')
3348 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003349 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003350 self.assertEqual(msg.get_payload(decode=True),
3351 'pöstál\n'.encode('utf-8'))
3352
R David Murray80e0aee2012-05-27 21:23:34 -04003353 # test_defect_handling:test_invalid_chars_in_base64_payload
R. David Murray96fd54e2010-10-08 15:55:28 +00003354 def test_8bit_in_base64_body(self):
R David Murray80e0aee2012-05-27 21:23:34 -04003355 # If we get 8bit bytes in a base64 body, we can just ignore them
3356 # as being outside the base64 alphabet and decode anyway. But
3357 # we register a defect.
R. David Murray96fd54e2010-10-08 15:55:28 +00003358 m = self.bodytest_msg.format(charset='utf-8',
3359 cte='base64',
3360 bodyline='cMO2c3RhbAá=').encode('utf-8')
3361 msg = email.message_from_bytes(m)
3362 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -04003363 'pöstal'.encode('utf-8'))
3364 self.assertIsInstance(msg.defects[0],
3365 errors.InvalidBase64CharactersDefect)
R. David Murray96fd54e2010-10-08 15:55:28 +00003366
3367 def test_8bit_in_uuencode_body(self):
3368 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3369 # normal means, so the block is returned undecoded, but as bytes.
3370 m = self.bodytest_msg.format(charset='utf-8',
3371 cte='uuencode',
3372 bodyline='<,.V<W1A; á ').encode('utf-8')
3373 msg = email.message_from_bytes(m)
3374 self.assertEqual(msg.get_payload(decode=True),
3375 '<,.V<W1A; á \n'.encode('utf-8'))
3376
3377
R. David Murray92532142011-01-07 23:25:30 +00003378 headertest_headers = (
3379 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3380 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3381 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3382 '\tJean de Baddie',
3383 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3384 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3385 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3386 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3387 )
3388 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3389 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003390
3391 def test_get_8bit_header(self):
3392 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003393 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3394 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003395
3396 def test_print_8bit_headers(self):
3397 msg = email.message_from_bytes(self.headertest_msg)
3398 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003399 textwrap.dedent("""\
3400 From: {}
3401 To: {}
3402 Subject: {}
3403 From: {}
3404
3405 Yes, they are flying.
3406 """).format(*[expected[1] for (_, expected) in
3407 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003408
3409 def test_values_with_8bit_headers(self):
3410 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003411 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003412 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003413 'b\uFFFD\uFFFDz',
3414 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3415 'coll\uFFFD\uFFFDgue, le pouf '
3416 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003417 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003418 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003419
3420 def test_items_with_8bit_headers(self):
3421 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003422 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003423 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003424 ('To', 'b\uFFFD\uFFFDz'),
3425 ('Subject', 'Maintenant je vous '
3426 'pr\uFFFD\uFFFDsente '
3427 'mon coll\uFFFD\uFFFDgue, le pouf '
3428 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3429 '\tJean de Baddie'),
3430 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003431
3432 def test_get_all_with_8bit_headers(self):
3433 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003434 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003435 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003436 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003437
R David Murraya2150232011-03-16 21:11:23 -04003438 def test_get_content_type_with_8bit(self):
3439 msg = email.message_from_bytes(textwrap.dedent("""\
3440 Content-Type: text/pl\xA7in; charset=utf-8
3441 """).encode('latin-1'))
3442 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3443 self.assertEqual(msg.get_content_maintype(), "text")
3444 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3445
R David Murray97f43c02012-06-24 05:03:27 -04003446 # test_headerregistry.TestContentTypeHeader.non_ascii_in_params
R David Murraya2150232011-03-16 21:11:23 -04003447 def test_get_params_with_8bit(self):
3448 msg = email.message_from_bytes(
3449 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3450 self.assertEqual(msg.get_params(header='x-header'),
3451 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3452 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3453 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3454 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3455
R David Murray97f43c02012-06-24 05:03:27 -04003456 # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value
R David Murraya2150232011-03-16 21:11:23 -04003457 def test_get_rfc2231_params_with_8bit(self):
3458 msg = email.message_from_bytes(textwrap.dedent("""\
3459 Content-Type: text/plain; charset=us-ascii;
3460 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3461 ).encode('latin-1'))
3462 self.assertEqual(msg.get_param('title'),
3463 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3464
3465 def test_set_rfc2231_params_with_8bit(self):
3466 msg = email.message_from_bytes(textwrap.dedent("""\
3467 Content-Type: text/plain; charset=us-ascii;
3468 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3469 ).encode('latin-1'))
3470 msg.set_param('title', 'test')
3471 self.assertEqual(msg.get_param('title'), 'test')
3472
3473 def test_del_rfc2231_params_with_8bit(self):
3474 msg = email.message_from_bytes(textwrap.dedent("""\
3475 Content-Type: text/plain; charset=us-ascii;
3476 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3477 ).encode('latin-1'))
3478 msg.del_param('title')
3479 self.assertEqual(msg.get_param('title'), None)
3480 self.assertEqual(msg.get_content_maintype(), 'text')
3481
3482 def test_get_payload_with_8bit_cte_header(self):
3483 msg = email.message_from_bytes(textwrap.dedent("""\
3484 Content-Transfer-Encoding: b\xa7se64
3485 Content-Type: text/plain; charset=latin-1
3486
3487 payload
3488 """).encode('latin-1'))
3489 self.assertEqual(msg.get_payload(), 'payload\n')
3490 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3491
R. David Murray96fd54e2010-10-08 15:55:28 +00003492 non_latin_bin_msg = textwrap.dedent("""\
3493 From: foo@bar.com
3494 To: báz
3495 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3496 \tJean de Baddie
3497 Mime-Version: 1.0
3498 Content-Type: text/plain; charset="utf-8"
3499 Content-Transfer-Encoding: 8bit
3500
3501 Да, они летят.
3502 """).encode('utf-8')
3503
3504 def test_bytes_generator(self):
3505 msg = email.message_from_bytes(self.non_latin_bin_msg)
3506 out = BytesIO()
3507 email.generator.BytesGenerator(out).flatten(msg)
3508 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3509
R. David Murray7372a072011-01-26 21:21:32 +00003510 def test_bytes_generator_handles_None_body(self):
3511 #Issue 11019
3512 msg = email.message.Message()
3513 out = BytesIO()
3514 email.generator.BytesGenerator(out).flatten(msg)
3515 self.assertEqual(out.getvalue(), b"\n")
3516
R. David Murray92532142011-01-07 23:25:30 +00003517 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003518 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003519 To: =?unknown-8bit?q?b=C3=A1z?=
3520 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3521 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3522 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003523 Mime-Version: 1.0
3524 Content-Type: text/plain; charset="utf-8"
3525 Content-Transfer-Encoding: base64
3526
3527 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3528 """)
3529
3530 def test_generator_handles_8bit(self):
3531 msg = email.message_from_bytes(self.non_latin_bin_msg)
3532 out = StringIO()
3533 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003534 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003535
3536 def test_bytes_generator_with_unix_from(self):
3537 # The unixfrom contains a current date, so we can't check it
3538 # literally. Just make sure the first word is 'From' and the
3539 # rest of the message matches the input.
3540 msg = email.message_from_bytes(self.non_latin_bin_msg)
3541 out = BytesIO()
3542 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3543 lines = out.getvalue().split(b'\n')
3544 self.assertEqual(lines[0].split()[0], b'From')
3545 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3546
R. David Murray92532142011-01-07 23:25:30 +00003547 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3548 non_latin_bin_msg_as7bit[2:4] = [
3549 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3550 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3551 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3552
R. David Murray96fd54e2010-10-08 15:55:28 +00003553 def test_message_from_binary_file(self):
3554 fn = 'test.msg'
3555 self.addCleanup(unlink, fn)
3556 with open(fn, 'wb') as testfile:
3557 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003558 with open(fn, 'rb') as testfile:
3559 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003560 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3561
3562 latin_bin_msg = textwrap.dedent("""\
3563 From: foo@bar.com
3564 To: Dinsdale
3565 Subject: Nudge nudge, wink, wink
3566 Mime-Version: 1.0
3567 Content-Type: text/plain; charset="latin-1"
3568 Content-Transfer-Encoding: 8bit
3569
3570 oh là là, know what I mean, know what I mean?
3571 """).encode('latin-1')
3572
3573 latin_bin_msg_as7bit = textwrap.dedent("""\
3574 From: foo@bar.com
3575 To: Dinsdale
3576 Subject: Nudge nudge, wink, wink
3577 Mime-Version: 1.0
3578 Content-Type: text/plain; charset="iso-8859-1"
3579 Content-Transfer-Encoding: quoted-printable
3580
3581 oh l=E0 l=E0, know what I mean, know what I mean?
3582 """)
3583
3584 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3585 m = email.message_from_bytes(self.latin_bin_msg)
3586 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3587
3588 def test_decoded_generator_emits_unicode_body(self):
3589 m = email.message_from_bytes(self.latin_bin_msg)
3590 out = StringIO()
3591 email.generator.DecodedGenerator(out).flatten(m)
3592 #DecodedHeader output contains an extra blank line compared
3593 #to the input message. RDM: not sure if this is a bug or not,
3594 #but it is not specific to the 8bit->7bit conversion.
3595 self.assertEqual(out.getvalue(),
3596 self.latin_bin_msg.decode('latin-1')+'\n')
3597
3598 def test_bytes_feedparser(self):
3599 bfp = email.feedparser.BytesFeedParser()
3600 for i in range(0, len(self.latin_bin_msg), 10):
3601 bfp.feed(self.latin_bin_msg[i:i+10])
3602 m = bfp.close()
3603 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3604
R. David Murray8451c4b2010-10-23 22:19:56 +00003605 def test_crlf_flatten(self):
3606 with openfile('msg_26.txt', 'rb') as fp:
3607 text = fp.read()
3608 msg = email.message_from_bytes(text)
3609 s = BytesIO()
3610 g = email.generator.BytesGenerator(s)
3611 g.flatten(msg, linesep='\r\n')
3612 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003613
3614 def test_8bit_multipart(self):
3615 # Issue 11605
3616 source = textwrap.dedent("""\
3617 Date: Fri, 18 Mar 2011 17:15:43 +0100
3618 To: foo@example.com
3619 From: foodwatch-Newsletter <bar@example.com>
3620 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3621 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3622 MIME-Version: 1.0
3623 Content-Type: multipart/alternative;
3624 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3625
3626 --b1_76a486bee62b0d200f33dc2ca08220ad
3627 Content-Type: text/plain; charset="utf-8"
3628 Content-Transfer-Encoding: 8bit
3629
3630 Guten Tag, ,
3631
3632 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3633 Nachrichten aus Japan.
3634
3635
3636 --b1_76a486bee62b0d200f33dc2ca08220ad
3637 Content-Type: text/html; charset="utf-8"
3638 Content-Transfer-Encoding: 8bit
3639
3640 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3641 "http://www.w3.org/TR/html4/loose.dtd">
3642 <html lang="de">
3643 <head>
3644 <title>foodwatch - Newsletter</title>
3645 </head>
3646 <body>
3647 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3648 die Nachrichten aus Japan.</p>
3649 </body>
3650 </html>
3651 --b1_76a486bee62b0d200f33dc2ca08220ad--
3652
3653 """).encode('utf-8')
3654 msg = email.message_from_bytes(source)
3655 s = BytesIO()
3656 g = email.generator.BytesGenerator(s)
3657 g.flatten(msg)
3658 self.assertEqual(s.getvalue(), source)
3659
R David Murray9fd170e2012-03-14 14:05:03 -04003660 def test_bytes_generator_b_encoding_linesep(self):
3661 # Issue 14062: b encoding was tacking on an extra \n.
3662 m = Message()
3663 # This has enough non-ascii that it should always end up b encoded.
3664 m['Subject'] = Header('žluťoučký kůň')
3665 s = BytesIO()
3666 g = email.generator.BytesGenerator(s)
3667 g.flatten(m, linesep='\r\n')
3668 self.assertEqual(
3669 s.getvalue(),
3670 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3671
3672 def test_generator_b_encoding_linesep(self):
3673 # Since this broke in ByteGenerator, test Generator for completeness.
3674 m = Message()
3675 # This has enough non-ascii that it should always end up b encoded.
3676 m['Subject'] = Header('žluťoučký kůň')
3677 s = StringIO()
3678 g = email.generator.Generator(s)
3679 g.flatten(m, linesep='\r\n')
3680 self.assertEqual(
3681 s.getvalue(),
3682 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3683
R. David Murray8451c4b2010-10-23 22:19:56 +00003684 maxDiff = None
3685
Ezio Melottib3aedd42010-11-20 19:04:17 +00003686
R. David Murray719a4492010-11-21 16:53:48 +00003687class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003688
R. David Murraye5db2632010-11-20 15:10:13 +00003689 maxDiff = None
3690
R. David Murray96fd54e2010-10-08 15:55:28 +00003691 def _msgobj(self, filename):
3692 with openfile(filename, 'rb') as fp:
3693 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003694 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003695 msg = email.message_from_bytes(data)
3696 return msg, data
3697
R. David Murray719a4492010-11-21 16:53:48 +00003698 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003699 b = BytesIO()
3700 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003701 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R David Murraya46ed112011-03-31 13:11:40 -04003702 self.assertEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003703
3704
R. David Murray719a4492010-11-21 16:53:48 +00003705class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3706 TestIdempotent):
3707 linesep = '\n'
3708 blinesep = b'\n'
3709 normalize_linesep_regex = re.compile(br'\r\n')
3710
3711
3712class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3713 TestIdempotent):
3714 linesep = '\r\n'
3715 blinesep = b'\r\n'
3716 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3717
Ezio Melottib3aedd42010-11-20 19:04:17 +00003718
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003719class TestBase64(unittest.TestCase):
3720 def test_len(self):
3721 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003722 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003723 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003724 for size in range(15):
3725 if size == 0 : bsize = 0
3726 elif size <= 3 : bsize = 4
3727 elif size <= 6 : bsize = 8
3728 elif size <= 9 : bsize = 12
3729 elif size <= 12: bsize = 16
3730 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003731 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003732
3733 def test_decode(self):
3734 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003735 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003736 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003737
3738 def test_encode(self):
3739 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003740 eq(base64mime.body_encode(b''), b'')
3741 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003742 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003743 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003744 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003745 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003746eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3747eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3748eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3749eHh4eCB4eHh4IA==
3750""")
3751 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003752 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003753 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003754eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3755eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3756eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3757eHh4eCB4eHh4IA==\r
3758""")
3759
3760 def test_header_encode(self):
3761 eq = self.assertEqual
3762 he = base64mime.header_encode
3763 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003764 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3765 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003766 # Test the charset option
3767 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3768 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003769
3770
Ezio Melottib3aedd42010-11-20 19:04:17 +00003771
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003772class TestQuopri(unittest.TestCase):
3773 def setUp(self):
3774 # Set of characters (as byte integers) that don't need to be encoded
3775 # in headers.
3776 self.hlit = list(chain(
3777 range(ord('a'), ord('z') + 1),
3778 range(ord('A'), ord('Z') + 1),
3779 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003780 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003781 # Set of characters (as byte integers) that do need to be encoded in
3782 # headers.
3783 self.hnon = [c for c in range(256) if c not in self.hlit]
3784 assert len(self.hlit) + len(self.hnon) == 256
3785 # Set of characters (as byte integers) that don't need to be encoded
3786 # in bodies.
3787 self.blit = list(range(ord(' '), ord('~') + 1))
3788 self.blit.append(ord('\t'))
3789 self.blit.remove(ord('='))
3790 # Set of characters (as byte integers) that do need to be encoded in
3791 # bodies.
3792 self.bnon = [c for c in range(256) if c not in self.blit]
3793 assert len(self.blit) + len(self.bnon) == 256
3794
Guido van Rossum9604e662007-08-30 03:46:43 +00003795 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003796 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003797 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003798 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003799 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003800 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003801 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003802
Guido van Rossum9604e662007-08-30 03:46:43 +00003803 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003804 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003805 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003806 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003807 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003808 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003809 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003810
3811 def test_header_quopri_len(self):
3812 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003813 eq(quoprimime.header_length(b'hello'), 5)
3814 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003815 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003816 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003817 # =?xxx?q?...?= means 10 extra characters
3818 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003819 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3820 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003821 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003822 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003823 # =?xxx?q?...?= means 10 extra characters
3824 10)
3825 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003826 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003827 'expected length 1 for %r' % chr(c))
3828 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003829 # Space is special; it's encoded to _
3830 if c == ord(' '):
3831 continue
3832 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003833 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003834 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003835
3836 def test_body_quopri_len(self):
3837 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003838 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003839 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003840 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003841 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003842
3843 def test_quote_unquote_idempotent(self):
3844 for x in range(256):
3845 c = chr(x)
3846 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3847
R David Murrayec1b5b82011-03-23 14:19:05 -04003848 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3849 if charset is None:
3850 encoded_header = quoprimime.header_encode(header)
3851 else:
3852 encoded_header = quoprimime.header_encode(header, charset)
3853 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003854
R David Murraycafd79d2011-03-23 15:25:55 -04003855 def test_header_encode_null(self):
3856 self._test_header_encode(b'', '')
3857
R David Murrayec1b5b82011-03-23 14:19:05 -04003858 def test_header_encode_one_word(self):
3859 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3860
3861 def test_header_encode_two_lines(self):
3862 self._test_header_encode(b'hello\nworld',
3863 '=?iso-8859-1?q?hello=0Aworld?=')
3864
3865 def test_header_encode_non_ascii(self):
3866 self._test_header_encode(b'hello\xc7there',
3867 '=?iso-8859-1?q?hello=C7there?=')
3868
3869 def test_header_encode_alt_charset(self):
3870 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3871 charset='iso-8859-2')
3872
3873 def _test_header_decode(self, encoded_header, expected_decoded_header):
3874 decoded_header = quoprimime.header_decode(encoded_header)
3875 self.assertEqual(decoded_header, expected_decoded_header)
3876
3877 def test_header_decode_null(self):
3878 self._test_header_decode('', '')
3879
3880 def test_header_decode_one_word(self):
3881 self._test_header_decode('hello', 'hello')
3882
3883 def test_header_decode_two_lines(self):
3884 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3885
3886 def test_header_decode_non_ascii(self):
3887 self._test_header_decode('hello=C7there', 'hello\xc7there')
3888
3889 def _test_decode(self, encoded, expected_decoded, eol=None):
3890 if eol is None:
3891 decoded = quoprimime.decode(encoded)
3892 else:
3893 decoded = quoprimime.decode(encoded, eol=eol)
3894 self.assertEqual(decoded, expected_decoded)
3895
3896 def test_decode_null_word(self):
3897 self._test_decode('', '')
3898
3899 def test_decode_null_line_null_word(self):
3900 self._test_decode('\r\n', '\n')
3901
3902 def test_decode_one_word(self):
3903 self._test_decode('hello', 'hello')
3904
3905 def test_decode_one_word_eol(self):
3906 self._test_decode('hello', 'hello', eol='X')
3907
3908 def test_decode_one_line(self):
3909 self._test_decode('hello\r\n', 'hello\n')
3910
3911 def test_decode_one_line_lf(self):
3912 self._test_decode('hello\n', 'hello\n')
3913
R David Murraycafd79d2011-03-23 15:25:55 -04003914 def test_decode_one_line_cr(self):
3915 self._test_decode('hello\r', 'hello\n')
3916
3917 def test_decode_one_line_nl(self):
3918 self._test_decode('hello\n', 'helloX', eol='X')
3919
3920 def test_decode_one_line_crnl(self):
3921 self._test_decode('hello\r\n', 'helloX', eol='X')
3922
R David Murrayec1b5b82011-03-23 14:19:05 -04003923 def test_decode_one_line_one_word(self):
3924 self._test_decode('hello\r\nworld', 'hello\nworld')
3925
3926 def test_decode_one_line_one_word_eol(self):
3927 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3928
3929 def test_decode_two_lines(self):
3930 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3931
R David Murraycafd79d2011-03-23 15:25:55 -04003932 def test_decode_two_lines_eol(self):
3933 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3934
R David Murrayec1b5b82011-03-23 14:19:05 -04003935 def test_decode_one_long_line(self):
3936 self._test_decode('Spam' * 250, 'Spam' * 250)
3937
3938 def test_decode_one_space(self):
3939 self._test_decode(' ', '')
3940
3941 def test_decode_multiple_spaces(self):
3942 self._test_decode(' ' * 5, '')
3943
3944 def test_decode_one_line_trailing_spaces(self):
3945 self._test_decode('hello \r\n', 'hello\n')
3946
3947 def test_decode_two_lines_trailing_spaces(self):
3948 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3949
3950 def test_decode_quoted_word(self):
3951 self._test_decode('=22quoted=20words=22', '"quoted words"')
3952
3953 def test_decode_uppercase_quoting(self):
3954 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
3955
3956 def test_decode_lowercase_quoting(self):
3957 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
3958
3959 def test_decode_soft_line_break(self):
3960 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
3961
3962 def test_decode_false_quoting(self):
3963 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
3964
3965 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
3966 kwargs = {}
3967 if maxlinelen is None:
3968 # Use body_encode's default.
3969 maxlinelen = 76
3970 else:
3971 kwargs['maxlinelen'] = maxlinelen
3972 if eol is None:
3973 # Use body_encode's default.
3974 eol = '\n'
3975 else:
3976 kwargs['eol'] = eol
3977 encoded_body = quoprimime.body_encode(body, **kwargs)
3978 self.assertEqual(encoded_body, expected_encoded_body)
3979 if eol == '\n' or eol == '\r\n':
3980 # We know how to split the result back into lines, so maxlinelen
3981 # can be checked.
3982 for line in encoded_body.splitlines():
3983 self.assertLessEqual(len(line), maxlinelen)
3984
3985 def test_encode_null(self):
3986 self._test_encode('', '')
3987
3988 def test_encode_null_lines(self):
3989 self._test_encode('\n\n', '\n\n')
3990
3991 def test_encode_one_line(self):
3992 self._test_encode('hello\n', 'hello\n')
3993
3994 def test_encode_one_line_crlf(self):
3995 self._test_encode('hello\r\n', 'hello\n')
3996
3997 def test_encode_one_line_eol(self):
3998 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
3999
4000 def test_encode_one_space(self):
4001 self._test_encode(' ', '=20')
4002
4003 def test_encode_one_line_one_space(self):
4004 self._test_encode(' \n', '=20\n')
4005
R David Murrayb938c8c2011-03-24 12:19:26 -04004006# XXX: body_encode() expect strings, but uses ord(char) from these strings
4007# to index into a 256-entry list. For code points above 255, this will fail.
4008# Should there be a check for 8-bit only ord() values in body, or at least
4009# a comment about the expected input?
4010
4011 def test_encode_two_lines_one_space(self):
4012 self._test_encode(' \n \n', '=20\n=20\n')
4013
R David Murrayec1b5b82011-03-23 14:19:05 -04004014 def test_encode_one_word_trailing_spaces(self):
4015 self._test_encode('hello ', 'hello =20')
4016
4017 def test_encode_one_line_trailing_spaces(self):
4018 self._test_encode('hello \n', 'hello =20\n')
4019
4020 def test_encode_one_word_trailing_tab(self):
4021 self._test_encode('hello \t', 'hello =09')
4022
4023 def test_encode_one_line_trailing_tab(self):
4024 self._test_encode('hello \t\n', 'hello =09\n')
4025
4026 def test_encode_trailing_space_before_maxlinelen(self):
4027 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
4028
R David Murrayb938c8c2011-03-24 12:19:26 -04004029 def test_encode_trailing_space_at_maxlinelen(self):
4030 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
4031
R David Murrayec1b5b82011-03-23 14:19:05 -04004032 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04004033 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
4034
4035 def test_encode_whitespace_lines(self):
4036 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04004037
4038 def test_encode_quoted_equals(self):
4039 self._test_encode('a = b', 'a =3D b')
4040
4041 def test_encode_one_long_string(self):
4042 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
4043
4044 def test_encode_one_long_line(self):
4045 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4046
4047 def test_encode_one_very_long_line(self):
4048 self._test_encode('x' * 200 + '\n',
4049 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
4050
4051 def test_encode_one_long_line(self):
4052 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4053
4054 def test_encode_shortest_maxlinelen(self):
4055 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004056
R David Murrayb938c8c2011-03-24 12:19:26 -04004057 def test_encode_maxlinelen_too_small(self):
4058 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
4059
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004060 def test_encode(self):
4061 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00004062 eq(quoprimime.body_encode(''), '')
4063 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004064 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00004065 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004066 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00004067 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004068xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
4069 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
4070x xxxx xxxx xxxx xxxx=20""")
4071 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00004072 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4073 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004074xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
4075 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
4076x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00004077 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004078one line
4079
4080two line"""), """\
4081one line
4082
4083two line""")
4084
4085
Ezio Melottib3aedd42010-11-20 19:04:17 +00004086
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004087# Test the Charset class
4088class TestCharset(unittest.TestCase):
4089 def tearDown(self):
4090 from email import charset as CharsetModule
4091 try:
4092 del CharsetModule.CHARSETS['fake']
4093 except KeyError:
4094 pass
4095
Guido van Rossum9604e662007-08-30 03:46:43 +00004096 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004097 eq = self.assertEqual
4098 # Make sure us-ascii = no Unicode conversion
4099 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00004100 eq(c.header_encode('Hello World!'), 'Hello World!')
4101 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004102 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00004103 self.assertRaises(UnicodeError, c.header_encode, s)
4104 c = Charset('utf-8')
4105 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004106
4107 def test_body_encode(self):
4108 eq = self.assertEqual
4109 # Try a charset with QP body encoding
4110 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004111 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004112 # Try a charset with Base64 body encoding
4113 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00004114 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004115 # Try a charset with None body encoding
4116 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004117 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004118 # Try the convert argument, where input codec != output codec
4119 c = Charset('euc-jp')
4120 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00004121 # XXX FIXME
4122## try:
4123## eq('\x1b$B5FCO;~IW\x1b(B',
4124## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4125## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4126## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4127## except LookupError:
4128## # We probably don't have the Japanese codecs installed
4129## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004130 # Testing SF bug #625509, which we have to fake, since there are no
4131 # built-in encodings where the header encoding is QP but the body
4132 # encoding is not.
4133 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04004134 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004135 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04004136 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004137
4138 def test_unicode_charset_name(self):
4139 charset = Charset('us-ascii')
4140 self.assertEqual(str(charset), 'us-ascii')
4141 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4142
4143
Ezio Melottib3aedd42010-11-20 19:04:17 +00004144
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004145# Test multilingual MIME headers.
4146class TestHeader(TestEmailBase):
4147 def test_simple(self):
4148 eq = self.ndiffAssertEqual
4149 h = Header('Hello World!')
4150 eq(h.encode(), 'Hello World!')
4151 h.append(' Goodbye World!')
4152 eq(h.encode(), 'Hello World! Goodbye World!')
4153
4154 def test_simple_surprise(self):
4155 eq = self.ndiffAssertEqual
4156 h = Header('Hello World!')
4157 eq(h.encode(), 'Hello World!')
4158 h.append('Goodbye World!')
4159 eq(h.encode(), 'Hello World! Goodbye World!')
4160
4161 def test_header_needs_no_decoding(self):
4162 h = 'no decoding needed'
4163 self.assertEqual(decode_header(h), [(h, None)])
4164
4165 def test_long(self):
4166 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4167 maxlinelen=76)
4168 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004169 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004170
4171 def test_multilingual(self):
4172 eq = self.ndiffAssertEqual
4173 g = Charset("iso-8859-1")
4174 cz = Charset("iso-8859-2")
4175 utf8 = Charset("utf-8")
4176 g_head = (b'Die Mieter treten hier ein werden mit einem '
4177 b'Foerderband komfortabel den Korridor entlang, '
4178 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4179 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4180 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4181 b'd\xf9vtipu.. ')
4182 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4183 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4184 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4185 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4186 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4187 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4188 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4189 '\u3044\u307e\u3059\u3002')
4190 h = Header(g_head, g)
4191 h.append(cz_head, cz)
4192 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004193 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004194 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004195=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4196 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4197 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4198 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004199 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4200 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4201 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4202 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004203 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4204 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4205 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4206 decoded = decode_header(enc)
4207 eq(len(decoded), 3)
4208 eq(decoded[0], (g_head, 'iso-8859-1'))
4209 eq(decoded[1], (cz_head, 'iso-8859-2'))
4210 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004211 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004212 eq(ustr,
4213 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4214 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4215 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4216 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4217 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4218 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4219 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4220 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4221 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4222 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4223 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4224 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4225 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4226 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4227 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4228 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4229 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004230 # Test make_header()
4231 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004232 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004233
4234 def test_empty_header_encode(self):
4235 h = Header()
4236 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004237
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004238 def test_header_ctor_default_args(self):
4239 eq = self.ndiffAssertEqual
4240 h = Header()
4241 eq(h, '')
4242 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004243 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004244
4245 def test_explicit_maxlinelen(self):
4246 eq = self.ndiffAssertEqual
4247 hstr = ('A very long line that must get split to something other '
4248 'than at the 76th character boundary to test the non-default '
4249 'behavior')
4250 h = Header(hstr)
4251 eq(h.encode(), '''\
4252A very long line that must get split to something other than at the 76th
4253 character boundary to test the non-default behavior''')
4254 eq(str(h), hstr)
4255 h = Header(hstr, header_name='Subject')
4256 eq(h.encode(), '''\
4257A very long line that must get split to something other than at the
4258 76th character boundary to test the non-default behavior''')
4259 eq(str(h), hstr)
4260 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4261 eq(h.encode(), hstr)
4262 eq(str(h), hstr)
4263
Guido van Rossum9604e662007-08-30 03:46:43 +00004264 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004265 eq = self.ndiffAssertEqual
4266 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004267 x = 'xxxx ' * 20
4268 h.append(x)
4269 s = h.encode()
4270 eq(s, """\
4271=?iso-8859-1?q?xxx?=
4272 =?iso-8859-1?q?x_?=
4273 =?iso-8859-1?q?xx?=
4274 =?iso-8859-1?q?xx?=
4275 =?iso-8859-1?q?_x?=
4276 =?iso-8859-1?q?xx?=
4277 =?iso-8859-1?q?x_?=
4278 =?iso-8859-1?q?xx?=
4279 =?iso-8859-1?q?xx?=
4280 =?iso-8859-1?q?_x?=
4281 =?iso-8859-1?q?xx?=
4282 =?iso-8859-1?q?x_?=
4283 =?iso-8859-1?q?xx?=
4284 =?iso-8859-1?q?xx?=
4285 =?iso-8859-1?q?_x?=
4286 =?iso-8859-1?q?xx?=
4287 =?iso-8859-1?q?x_?=
4288 =?iso-8859-1?q?xx?=
4289 =?iso-8859-1?q?xx?=
4290 =?iso-8859-1?q?_x?=
4291 =?iso-8859-1?q?xx?=
4292 =?iso-8859-1?q?x_?=
4293 =?iso-8859-1?q?xx?=
4294 =?iso-8859-1?q?xx?=
4295 =?iso-8859-1?q?_x?=
4296 =?iso-8859-1?q?xx?=
4297 =?iso-8859-1?q?x_?=
4298 =?iso-8859-1?q?xx?=
4299 =?iso-8859-1?q?xx?=
4300 =?iso-8859-1?q?_x?=
4301 =?iso-8859-1?q?xx?=
4302 =?iso-8859-1?q?x_?=
4303 =?iso-8859-1?q?xx?=
4304 =?iso-8859-1?q?xx?=
4305 =?iso-8859-1?q?_x?=
4306 =?iso-8859-1?q?xx?=
4307 =?iso-8859-1?q?x_?=
4308 =?iso-8859-1?q?xx?=
4309 =?iso-8859-1?q?xx?=
4310 =?iso-8859-1?q?_x?=
4311 =?iso-8859-1?q?xx?=
4312 =?iso-8859-1?q?x_?=
4313 =?iso-8859-1?q?xx?=
4314 =?iso-8859-1?q?xx?=
4315 =?iso-8859-1?q?_x?=
4316 =?iso-8859-1?q?xx?=
4317 =?iso-8859-1?q?x_?=
4318 =?iso-8859-1?q?xx?=
4319 =?iso-8859-1?q?xx?=
4320 =?iso-8859-1?q?_?=""")
4321 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004322 h = Header(charset='iso-8859-1', maxlinelen=40)
4323 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004324 s = h.encode()
4325 eq(s, """\
4326=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4327 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4328 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4329 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4330 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4331 eq(x, str(make_header(decode_header(s))))
4332
4333 def test_base64_splittable(self):
4334 eq = self.ndiffAssertEqual
4335 h = Header(charset='koi8-r', maxlinelen=20)
4336 x = 'xxxx ' * 20
4337 h.append(x)
4338 s = h.encode()
4339 eq(s, """\
4340=?koi8-r?b?eHh4?=
4341 =?koi8-r?b?eCB4?=
4342 =?koi8-r?b?eHh4?=
4343 =?koi8-r?b?IHh4?=
4344 =?koi8-r?b?eHgg?=
4345 =?koi8-r?b?eHh4?=
4346 =?koi8-r?b?eCB4?=
4347 =?koi8-r?b?eHh4?=
4348 =?koi8-r?b?IHh4?=
4349 =?koi8-r?b?eHgg?=
4350 =?koi8-r?b?eHh4?=
4351 =?koi8-r?b?eCB4?=
4352 =?koi8-r?b?eHh4?=
4353 =?koi8-r?b?IHh4?=
4354 =?koi8-r?b?eHgg?=
4355 =?koi8-r?b?eHh4?=
4356 =?koi8-r?b?eCB4?=
4357 =?koi8-r?b?eHh4?=
4358 =?koi8-r?b?IHh4?=
4359 =?koi8-r?b?eHgg?=
4360 =?koi8-r?b?eHh4?=
4361 =?koi8-r?b?eCB4?=
4362 =?koi8-r?b?eHh4?=
4363 =?koi8-r?b?IHh4?=
4364 =?koi8-r?b?eHgg?=
4365 =?koi8-r?b?eHh4?=
4366 =?koi8-r?b?eCB4?=
4367 =?koi8-r?b?eHh4?=
4368 =?koi8-r?b?IHh4?=
4369 =?koi8-r?b?eHgg?=
4370 =?koi8-r?b?eHh4?=
4371 =?koi8-r?b?eCB4?=
4372 =?koi8-r?b?eHh4?=
4373 =?koi8-r?b?IA==?=""")
4374 eq(x, str(make_header(decode_header(s))))
4375 h = Header(charset='koi8-r', maxlinelen=40)
4376 h.append(x)
4377 s = h.encode()
4378 eq(s, """\
4379=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4380 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4381 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4382 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4383 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4384 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4385 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004386
4387 def test_us_ascii_header(self):
4388 eq = self.assertEqual
4389 s = 'hello'
4390 x = decode_header(s)
4391 eq(x, [('hello', None)])
4392 h = make_header(x)
4393 eq(s, h.encode())
4394
4395 def test_string_charset(self):
4396 eq = self.assertEqual
4397 h = Header()
4398 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004399 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004400
4401## def test_unicode_error(self):
4402## raises = self.assertRaises
4403## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4404## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4405## h = Header()
4406## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4407## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4408## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4409
4410 def test_utf8_shortest(self):
4411 eq = self.assertEqual
4412 h = Header('p\xf6stal', 'utf-8')
4413 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4414 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4415 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4416
4417 def test_bad_8bit_header(self):
4418 raises = self.assertRaises
4419 eq = self.assertEqual
4420 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4421 raises(UnicodeError, Header, x)
4422 h = Header()
4423 raises(UnicodeError, h.append, x)
4424 e = x.decode('utf-8', 'replace')
4425 eq(str(Header(x, errors='replace')), e)
4426 h.append(x, errors='replace')
4427 eq(str(h), e)
4428
R David Murray041015c2011-03-25 15:10:55 -04004429 def test_escaped_8bit_header(self):
4430 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004431 e = x.decode('ascii', 'surrogateescape')
4432 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004433 self.assertEqual(str(h),
4434 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4435 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4436
R David Murraye5e366c2011-06-18 12:57:28 -04004437 def test_header_handles_binary_unknown8bit(self):
4438 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4439 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4440 self.assertEqual(str(h),
4441 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4442 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4443
4444 def test_make_header_handles_binary_unknown8bit(self):
4445 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4446 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4447 h2 = email.header.make_header(email.header.decode_header(h))
4448 self.assertEqual(str(h2),
4449 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4450 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4451
R David Murray041015c2011-03-25 15:10:55 -04004452 def test_modify_returned_list_does_not_change_header(self):
4453 h = Header('test')
4454 chunks = email.header.decode_header(h)
4455 chunks.append(('ascii', 'test2'))
4456 self.assertEqual(str(h), 'test')
4457
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004458 def test_encoded_adjacent_nonencoded(self):
4459 eq = self.assertEqual
4460 h = Header()
4461 h.append('hello', 'iso-8859-1')
4462 h.append('world')
4463 s = h.encode()
4464 eq(s, '=?iso-8859-1?q?hello?= world')
4465 h = make_header(decode_header(s))
4466 eq(h.encode(), s)
4467
R David Murray07ea53c2012-06-02 17:56:49 -04004468 def test_whitespace_keeper(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004469 eq = self.assertEqual
4470 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4471 parts = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04004472 eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004473 hdr = make_header(parts)
4474 eq(hdr.encode(),
4475 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4476
4477 def test_broken_base64_header(self):
4478 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004479 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004480 raises(errors.HeaderParseError, decode_header, s)
4481
R. David Murray477efb32011-01-05 01:39:32 +00004482 def test_shift_jis_charset(self):
4483 h = Header('文', charset='shift_jis')
4484 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4485
R David Murrayde912762011-03-16 18:26:23 -04004486 def test_flatten_header_with_no_value(self):
4487 # Issue 11401 (regression from email 4.x) Note that the space after
4488 # the header doesn't reflect the input, but this is also the way
4489 # email 4.x behaved. At some point it would be nice to fix that.
4490 msg = email.message_from_string("EmptyHeader:")
4491 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4492
R David Murray01581ee2011-04-18 10:04:34 -04004493 def test_encode_preserves_leading_ws_on_value(self):
4494 msg = Message()
4495 msg['SomeHeader'] = ' value with leading ws'
4496 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4497
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004498
Ezio Melottib3aedd42010-11-20 19:04:17 +00004499
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004500# Test RFC 2231 header parameters (en/de)coding
4501class TestRFC2231(TestEmailBase):
R David Murray97f43c02012-06-24 05:03:27 -04004502
4503 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
4504 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004505 def test_get_param(self):
4506 eq = self.assertEqual
4507 msg = self._msgobj('msg_29.txt')
4508 eq(msg.get_param('title'),
4509 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4510 eq(msg.get_param('title', unquote=False),
4511 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4512
4513 def test_set_param(self):
4514 eq = self.ndiffAssertEqual
4515 msg = Message()
4516 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4517 charset='us-ascii')
4518 eq(msg.get_param('title'),
4519 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4520 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4521 charset='us-ascii', language='en')
4522 eq(msg.get_param('title'),
4523 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4524 msg = self._msgobj('msg_01.txt')
4525 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4526 charset='us-ascii', language='en')
4527 eq(msg.as_string(maxheaderlen=78), """\
4528Return-Path: <bbb@zzz.org>
4529Delivered-To: bbb@zzz.org
4530Received: by mail.zzz.org (Postfix, from userid 889)
4531\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4532MIME-Version: 1.0
4533Content-Transfer-Encoding: 7bit
4534Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4535From: bbb@ddd.com (John X. Doe)
4536To: bbb@zzz.org
4537Subject: This is a test message
4538Date: Fri, 4 May 2001 14:05:44 -0400
4539Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004540 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004541
4542
4543Hi,
4544
4545Do you like this message?
4546
4547-Me
4548""")
4549
R David Murraya2860e82011-04-16 09:20:30 -04004550 def test_set_param_requote(self):
4551 msg = Message()
4552 msg.set_param('title', 'foo')
4553 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4554 msg.set_param('title', 'bar', requote=False)
4555 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4556 # tspecial is still quoted.
4557 msg.set_param('title', "(bar)bell", requote=False)
4558 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4559
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004560 def test_del_param(self):
4561 eq = self.ndiffAssertEqual
4562 msg = self._msgobj('msg_01.txt')
4563 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4564 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4565 charset='us-ascii', language='en')
4566 msg.del_param('foo', header='Content-Type')
4567 eq(msg.as_string(maxheaderlen=78), """\
4568Return-Path: <bbb@zzz.org>
4569Delivered-To: bbb@zzz.org
4570Received: by mail.zzz.org (Postfix, from userid 889)
4571\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4572MIME-Version: 1.0
4573Content-Transfer-Encoding: 7bit
4574Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4575From: bbb@ddd.com (John X. Doe)
4576To: bbb@zzz.org
4577Subject: This is a test message
4578Date: Fri, 4 May 2001 14:05:44 -0400
4579Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004580 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004581
4582
4583Hi,
4584
4585Do you like this message?
4586
4587-Me
4588""")
4589
R David Murray97f43c02012-06-24 05:03:27 -04004590 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset
4591 # I changed the charset name, though, because the one in the file isn't
4592 # a legal charset name. Should add a test for an illegal charset.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004593 def test_rfc2231_get_content_charset(self):
4594 eq = self.assertEqual
4595 msg = self._msgobj('msg_32.txt')
4596 eq(msg.get_content_charset(), 'us-ascii')
4597
R David Murray97f43c02012-06-24 05:03:27 -04004598 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes
R. David Murraydfd7eb02010-12-24 22:36:49 +00004599 def test_rfc2231_parse_rfc_quoting(self):
4600 m = textwrap.dedent('''\
4601 Content-Disposition: inline;
4602 \tfilename*0*=''This%20is%20even%20more%20;
4603 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4604 \tfilename*2="is it not.pdf"
4605
4606 ''')
4607 msg = email.message_from_string(m)
4608 self.assertEqual(msg.get_filename(),
4609 'This is even more ***fun*** is it not.pdf')
4610 self.assertEqual(m, msg.as_string())
4611
R David Murray97f43c02012-06-24 05:03:27 -04004612 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
R. David Murraydfd7eb02010-12-24 22:36:49 +00004613 def test_rfc2231_parse_extra_quoting(self):
4614 m = textwrap.dedent('''\
4615 Content-Disposition: inline;
4616 \tfilename*0*="''This%20is%20even%20more%20";
4617 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4618 \tfilename*2="is it not.pdf"
4619
4620 ''')
4621 msg = email.message_from_string(m)
4622 self.assertEqual(msg.get_filename(),
4623 'This is even more ***fun*** is it not.pdf')
4624 self.assertEqual(m, msg.as_string())
4625
R David Murray97f43c02012-06-24 05:03:27 -04004626 # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset
4627 # but new test uses *0* because otherwise lang/charset is not valid.
4628 # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004629 def test_rfc2231_no_language_or_charset(self):
4630 m = '''\
4631Content-Transfer-Encoding: 8bit
4632Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4633Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4634
4635'''
4636 msg = email.message_from_string(m)
4637 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004638 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004639 self.assertEqual(
4640 param,
4641 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4642
R David Murray97f43c02012-06-24 05:03:27 -04004643 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004644 def test_rfc2231_no_language_or_charset_in_filename(self):
4645 m = '''\
4646Content-Disposition: inline;
4647\tfilename*0*="''This%20is%20even%20more%20";
4648\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4649\tfilename*2="is it not.pdf"
4650
4651'''
4652 msg = email.message_from_string(m)
4653 self.assertEqual(msg.get_filename(),
4654 'This is even more ***fun*** is it not.pdf')
4655
R David Murray97f43c02012-06-24 05:03:27 -04004656 # Duplicate of previous test?
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004657 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4658 m = '''\
4659Content-Disposition: inline;
4660\tfilename*0*="''This%20is%20even%20more%20";
4661\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4662\tfilename*2="is it not.pdf"
4663
4664'''
4665 msg = email.message_from_string(m)
4666 self.assertEqual(msg.get_filename(),
4667 'This is even more ***fun*** is it not.pdf')
4668
R David Murray97f43c02012-06-24 05:03:27 -04004669 # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded,
4670 # but the test below is wrong (the first part should be decoded).
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004671 def test_rfc2231_partly_encoded(self):
4672 m = '''\
4673Content-Disposition: inline;
4674\tfilename*0="''This%20is%20even%20more%20";
4675\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4676\tfilename*2="is it not.pdf"
4677
4678'''
4679 msg = email.message_from_string(m)
4680 self.assertEqual(
4681 msg.get_filename(),
4682 'This%20is%20even%20more%20***fun*** is it not.pdf')
4683
4684 def test_rfc2231_partly_nonencoded(self):
4685 m = '''\
4686Content-Disposition: inline;
4687\tfilename*0="This%20is%20even%20more%20";
4688\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4689\tfilename*2="is it not.pdf"
4690
4691'''
4692 msg = email.message_from_string(m)
4693 self.assertEqual(
4694 msg.get_filename(),
4695 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4696
4697 def test_rfc2231_no_language_or_charset_in_boundary(self):
4698 m = '''\
4699Content-Type: multipart/alternative;
4700\tboundary*0*="''This%20is%20even%20more%20";
4701\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4702\tboundary*2="is it not.pdf"
4703
4704'''
4705 msg = email.message_from_string(m)
4706 self.assertEqual(msg.get_boundary(),
4707 'This is even more ***fun*** is it not.pdf')
4708
4709 def test_rfc2231_no_language_or_charset_in_charset(self):
4710 # This is a nonsensical charset value, but tests the code anyway
4711 m = '''\
4712Content-Type: text/plain;
4713\tcharset*0*="This%20is%20even%20more%20";
4714\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4715\tcharset*2="is it not.pdf"
4716
4717'''
4718 msg = email.message_from_string(m)
4719 self.assertEqual(msg.get_content_charset(),
4720 'this is even more ***fun*** is it not.pdf')
4721
R David Murray97f43c02012-06-24 05:03:27 -04004722 # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004723 def test_rfc2231_bad_encoding_in_filename(self):
4724 m = '''\
4725Content-Disposition: inline;
4726\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4727\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4728\tfilename*2="is it not.pdf"
4729
4730'''
4731 msg = email.message_from_string(m)
4732 self.assertEqual(msg.get_filename(),
4733 'This is even more ***fun*** is it not.pdf')
4734
4735 def test_rfc2231_bad_encoding_in_charset(self):
4736 m = """\
4737Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4738
4739"""
4740 msg = email.message_from_string(m)
4741 # This should return None because non-ascii characters in the charset
4742 # are not allowed.
4743 self.assertEqual(msg.get_content_charset(), None)
4744
4745 def test_rfc2231_bad_character_in_charset(self):
4746 m = """\
4747Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4748
4749"""
4750 msg = email.message_from_string(m)
4751 # This should return None because non-ascii characters in the charset
4752 # are not allowed.
4753 self.assertEqual(msg.get_content_charset(), None)
4754
4755 def test_rfc2231_bad_character_in_filename(self):
4756 m = '''\
4757Content-Disposition: inline;
4758\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4759\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4760\tfilename*2*="is it not.pdf%E2"
4761
4762'''
4763 msg = email.message_from_string(m)
4764 self.assertEqual(msg.get_filename(),
4765 'This is even more ***fun*** is it not.pdf\ufffd')
4766
4767 def test_rfc2231_unknown_encoding(self):
4768 m = """\
4769Content-Transfer-Encoding: 8bit
4770Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4771
4772"""
4773 msg = email.message_from_string(m)
4774 self.assertEqual(msg.get_filename(), 'myfile.txt')
4775
4776 def test_rfc2231_single_tick_in_filename_extended(self):
4777 eq = self.assertEqual
4778 m = """\
4779Content-Type: application/x-foo;
4780\tname*0*=\"Frank's\"; name*1*=\" Document\"
4781
4782"""
4783 msg = email.message_from_string(m)
4784 charset, language, s = msg.get_param('name')
4785 eq(charset, None)
4786 eq(language, None)
4787 eq(s, "Frank's Document")
4788
R David Murray97f43c02012-06-24 05:03:27 -04004789 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004790 def test_rfc2231_single_tick_in_filename(self):
4791 m = """\
4792Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4793
4794"""
4795 msg = email.message_from_string(m)
4796 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004797 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004798 self.assertEqual(param, "Frank's Document")
4799
R David Murray97f43c02012-06-24 05:03:27 -04004800 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004801 def test_rfc2231_tick_attack_extended(self):
4802 eq = self.assertEqual
4803 m = """\
4804Content-Type: application/x-foo;
4805\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4806
4807"""
4808 msg = email.message_from_string(m)
4809 charset, language, s = msg.get_param('name')
4810 eq(charset, 'us-ascii')
4811 eq(language, 'en-us')
4812 eq(s, "Frank's Document")
4813
R David Murray97f43c02012-06-24 05:03:27 -04004814 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004815 def test_rfc2231_tick_attack(self):
4816 m = """\
4817Content-Type: application/x-foo;
4818\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4819
4820"""
4821 msg = email.message_from_string(m)
4822 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004823 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004824 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4825
R David Murray97f43c02012-06-24 05:03:27 -04004826 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004827 def test_rfc2231_no_extended_values(self):
4828 eq = self.assertEqual
4829 m = """\
4830Content-Type: application/x-foo; name=\"Frank's Document\"
4831
4832"""
4833 msg = email.message_from_string(m)
4834 eq(msg.get_param('name'), "Frank's Document")
4835
R David Murray97f43c02012-06-24 05:03:27 -04004836 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004837 def test_rfc2231_encoded_then_unencoded_segments(self):
4838 eq = self.assertEqual
4839 m = """\
4840Content-Type: application/x-foo;
4841\tname*0*=\"us-ascii'en-us'My\";
4842\tname*1=\" Document\";
4843\tname*2*=\" For You\"
4844
4845"""
4846 msg = email.message_from_string(m)
4847 charset, language, s = msg.get_param('name')
4848 eq(charset, 'us-ascii')
4849 eq(language, 'en-us')
4850 eq(s, 'My Document For You')
4851
R David Murray97f43c02012-06-24 05:03:27 -04004852 # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments
4853 # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004854 def test_rfc2231_unencoded_then_encoded_segments(self):
4855 eq = self.assertEqual
4856 m = """\
4857Content-Type: application/x-foo;
4858\tname*0=\"us-ascii'en-us'My\";
4859\tname*1*=\" Document\";
4860\tname*2*=\" For You\"
4861
4862"""
4863 msg = email.message_from_string(m)
4864 charset, language, s = msg.get_param('name')
4865 eq(charset, 'us-ascii')
4866 eq(language, 'en-us')
4867 eq(s, 'My Document For You')
4868
4869
Ezio Melottib3aedd42010-11-20 19:04:17 +00004870
R. David Murraya8f480f2010-01-16 18:30:03 +00004871# Tests to ensure that signed parts of an email are completely preserved, as
4872# required by RFC1847 section 2.1. Note that these are incomplete, because the
4873# email package does not currently always preserve the body. See issue 1670765.
4874class TestSigned(TestEmailBase):
4875
4876 def _msg_and_obj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -04004877 with openfile(filename) as fp:
R. David Murraya8f480f2010-01-16 18:30:03 +00004878 original = fp.read()
4879 msg = email.message_from_string(original)
4880 return original, msg
4881
4882 def _signed_parts_eq(self, original, result):
4883 # Extract the first mime part of each message
4884 import re
4885 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4886 inpart = repart.search(original).group(2)
4887 outpart = repart.search(result).group(2)
4888 self.assertEqual(outpart, inpart)
4889
4890 def test_long_headers_as_string(self):
4891 original, msg = self._msg_and_obj('msg_45.txt')
4892 result = msg.as_string()
4893 self._signed_parts_eq(original, result)
4894
4895 def test_long_headers_as_string_maxheaderlen(self):
4896 original, msg = self._msg_and_obj('msg_45.txt')
4897 result = msg.as_string(maxheaderlen=60)
4898 self._signed_parts_eq(original, result)
4899
4900 def test_long_headers_flatten(self):
4901 original, msg = self._msg_and_obj('msg_45.txt')
4902 fp = StringIO()
4903 Generator(fp).flatten(msg)
4904 result = fp.getvalue()
4905 self._signed_parts_eq(original, result)
4906
4907
Ezio Melottib3aedd42010-11-20 19:04:17 +00004908
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004909if __name__ == '__main__':
R David Murray9aaba782011-03-21 17:17:06 -04004910 unittest.main()