blob: 36c344f846be8f8550880ca06b06296ca22ce098 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
R David Murrayc27e5222012-05-25 15:01:48 -040019import email.policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +000020
21from email.charset import Charset
22from email.header import Header, decode_header, make_header
23from email.parser import Parser, HeaderParser
R David Murray638d40b2012-08-24 11:14:13 -040024from email.generator import Generator, DecodedGenerator, BytesGenerator
Guido van Rossum8b3febe2007-08-30 01:15:14 +000025from email.message import Message
26from email.mime.application import MIMEApplication
27from email.mime.audio import MIMEAudio
28from email.mime.text import MIMEText
29from email.mime.image import MIMEImage
30from email.mime.base import MIMEBase
31from email.mime.message import MIMEMessage
32from email.mime.multipart import MIMEMultipart
33from email import utils
34from email import errors
35from email import encoders
36from email import iterators
37from email import base64mime
38from email import quoprimime
39
R David Murray28346b82011-03-31 11:40:20 -040040from test.support import run_unittest, unlink
R David Murraya256bac2011-03-31 12:20:23 -040041from test.test_email import openfile, TestEmailBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +000042
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Guido van Rossum8b3febe2007-08-30 01:15:14 +000048# Test various aspects of the Message class's API
49class TestMessageAPI(TestEmailBase):
50 def test_get_all(self):
51 eq = self.assertEqual
52 msg = self._msgobj('msg_20.txt')
53 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
54 eq(msg.get_all('xx', 'n/a'), 'n/a')
55
R. David Murraye5db2632010-11-20 15:10:13 +000056 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000057 eq = self.assertEqual
58 msg = Message()
59 eq(msg.get_charset(), None)
60 charset = Charset('iso-8859-1')
61 msg.set_charset(charset)
62 eq(msg['mime-version'], '1.0')
63 eq(msg.get_content_type(), 'text/plain')
64 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
65 eq(msg.get_param('charset'), 'iso-8859-1')
66 eq(msg['content-transfer-encoding'], 'quoted-printable')
67 eq(msg.get_charset().input_charset, 'iso-8859-1')
68 # Remove the charset
69 msg.set_charset(None)
70 eq(msg.get_charset(), None)
71 eq(msg['content-type'], 'text/plain')
72 # Try adding a charset when there's already MIME headers present
73 msg = Message()
74 msg['MIME-Version'] = '2.0'
75 msg['Content-Type'] = 'text/x-weird'
76 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
77 msg.set_charset(charset)
78 eq(msg['mime-version'], '2.0')
79 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
80 eq(msg['content-transfer-encoding'], 'quinted-puntable')
81
82 def test_set_charset_from_string(self):
83 eq = self.assertEqual
84 msg = Message()
85 msg.set_charset('us-ascii')
86 eq(msg.get_charset().input_charset, 'us-ascii')
87 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
88
89 def test_set_payload_with_charset(self):
90 msg = Message()
91 charset = Charset('iso-8859-1')
92 msg.set_payload('This is a string payload', charset)
93 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
94
95 def test_get_charsets(self):
96 eq = self.assertEqual
97
98 msg = self._msgobj('msg_08.txt')
99 charsets = msg.get_charsets()
100 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
101
102 msg = self._msgobj('msg_09.txt')
103 charsets = msg.get_charsets('dingbat')
104 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
105 'koi8-r'])
106
107 msg = self._msgobj('msg_12.txt')
108 charsets = msg.get_charsets()
109 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
110 'iso-8859-3', 'us-ascii', 'koi8-r'])
111
112 def test_get_filename(self):
113 eq = self.assertEqual
114
115 msg = self._msgobj('msg_04.txt')
116 filenames = [p.get_filename() for p in msg.get_payload()]
117 eq(filenames, ['msg.txt', 'msg.txt'])
118
119 msg = self._msgobj('msg_07.txt')
120 subpart = msg.get_payload(1)
121 eq(subpart.get_filename(), 'dingusfish.gif')
122
123 def test_get_filename_with_name_parameter(self):
124 eq = self.assertEqual
125
126 msg = self._msgobj('msg_44.txt')
127 filenames = [p.get_filename() for p in msg.get_payload()]
128 eq(filenames, ['msg.txt', 'msg.txt'])
129
130 def test_get_boundary(self):
131 eq = self.assertEqual
132 msg = self._msgobj('msg_07.txt')
133 # No quotes!
134 eq(msg.get_boundary(), 'BOUNDARY')
135
136 def test_set_boundary(self):
137 eq = self.assertEqual
138 # This one has no existing boundary parameter, but the Content-Type:
139 # header appears fifth.
140 msg = self._msgobj('msg_01.txt')
141 msg.set_boundary('BOUNDARY')
142 header, value = msg.items()[4]
143 eq(header.lower(), 'content-type')
144 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
145 # This one has a Content-Type: header, with a boundary, stuck in the
146 # middle of its headers. Make sure the order is preserved; it should
147 # be fifth.
148 msg = self._msgobj('msg_04.txt')
149 msg.set_boundary('BOUNDARY')
150 header, value = msg.items()[4]
151 eq(header.lower(), 'content-type')
152 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
153 # And this one has no Content-Type: header at all.
154 msg = self._msgobj('msg_03.txt')
155 self.assertRaises(errors.HeaderParseError,
156 msg.set_boundary, 'BOUNDARY')
157
R. David Murray73a559d2010-12-21 18:07:59 +0000158 def test_make_boundary(self):
159 msg = MIMEMultipart('form-data')
160 # Note that when the boundary gets created is an implementation
161 # detail and might change.
162 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
163 # Trigger creation of boundary
164 msg.as_string()
165 self.assertEqual(msg.items()[0][1][:33],
166 'multipart/form-data; boundary="==')
167 # XXX: there ought to be tests of the uniqueness of the boundary, too.
168
R. David Murray57c45ac2010-02-21 04:39:40 +0000169 def test_message_rfc822_only(self):
170 # Issue 7970: message/rfc822 not in multipart parsed by
171 # HeaderParser caused an exception when flattened.
R David Murray28346b82011-03-31 11:40:20 -0400172 with openfile('msg_46.txt') as fp:
Brett Cannon384917a2010-10-29 23:08:36 +0000173 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000174 parser = HeaderParser()
175 msg = parser.parsestr(msgdata)
176 out = StringIO()
177 gen = Generator(out, True, 0)
178 gen.flatten(msg, False)
179 self.assertEqual(out.getvalue(), msgdata)
180
R David Murrayb35c8502011-04-13 16:46:05 -0400181 def test_byte_message_rfc822_only(self):
182 # Make sure new bytes header parser also passes this.
183 with openfile('msg_46.txt', 'rb') as fp:
184 msgdata = fp.read()
185 parser = email.parser.BytesHeaderParser()
186 msg = parser.parsebytes(msgdata)
187 out = BytesIO()
188 gen = email.generator.BytesGenerator(out)
189 gen.flatten(msg)
190 self.assertEqual(out.getvalue(), msgdata)
191
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000192 def test_get_decoded_payload(self):
193 eq = self.assertEqual
194 msg = self._msgobj('msg_10.txt')
195 # The outer message is a multipart
196 eq(msg.get_payload(decode=True), None)
197 # Subpart 1 is 7bit encoded
198 eq(msg.get_payload(0).get_payload(decode=True),
199 b'This is a 7bit encoded message.\n')
200 # Subpart 2 is quopri
201 eq(msg.get_payload(1).get_payload(decode=True),
202 b'\xa1This is a Quoted Printable encoded message!\n')
203 # Subpart 3 is base64
204 eq(msg.get_payload(2).get_payload(decode=True),
205 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000206 # Subpart 4 is base64 with a trailing newline, which
207 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000208 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000209 b'This is a Base64 encoded message.\n')
210 # Subpart 5 has no Content-Transfer-Encoding: header.
211 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000212 b'This has no Content-Transfer-Encoding: header.\n')
213
214 def test_get_decoded_uu_payload(self):
215 eq = self.assertEqual
216 msg = Message()
217 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
218 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
219 msg['content-transfer-encoding'] = cte
220 eq(msg.get_payload(decode=True), b'hello world')
221 # Now try some bogus data
222 msg.set_payload('foo')
223 eq(msg.get_payload(decode=True), b'foo')
224
R David Murraya2860e82011-04-16 09:20:30 -0400225 def test_get_payload_n_raises_on_non_multipart(self):
226 msg = Message()
227 self.assertRaises(TypeError, msg.get_payload, 1)
228
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000229 def test_decoded_generator(self):
230 eq = self.assertEqual
231 msg = self._msgobj('msg_07.txt')
232 with openfile('msg_17.txt') as fp:
233 text = fp.read()
234 s = StringIO()
235 g = DecodedGenerator(s)
236 g.flatten(msg)
237 eq(s.getvalue(), text)
238
239 def test__contains__(self):
240 msg = Message()
241 msg['From'] = 'Me'
242 msg['to'] = 'You'
243 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000244 self.assertTrue('from' in msg)
245 self.assertTrue('From' in msg)
246 self.assertTrue('FROM' in msg)
247 self.assertTrue('to' in msg)
248 self.assertTrue('To' in msg)
249 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000250
251 def test_as_string(self):
252 eq = self.ndiffAssertEqual
253 msg = self._msgobj('msg_01.txt')
254 with openfile('msg_01.txt') as fp:
255 text = fp.read()
256 eq(text, str(msg))
257 fullrepr = msg.as_string(unixfrom=True)
258 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000259 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000260 eq(text, NL.join(lines[1:]))
261
R David Murray97f43c02012-06-24 05:03:27 -0400262 # test_headerregistry.TestContentTypeHeader.bad_params
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000263 def test_bad_param(self):
264 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
265 self.assertEqual(msg.get_param('baz'), '')
266
267 def test_missing_filename(self):
268 msg = email.message_from_string("From: foo\n")
269 self.assertEqual(msg.get_filename(), None)
270
271 def test_bogus_filename(self):
272 msg = email.message_from_string(
273 "Content-Disposition: blarg; filename\n")
274 self.assertEqual(msg.get_filename(), '')
275
276 def test_missing_boundary(self):
277 msg = email.message_from_string("From: foo\n")
278 self.assertEqual(msg.get_boundary(), None)
279
280 def test_get_params(self):
281 eq = self.assertEqual
282 msg = email.message_from_string(
283 'X-Header: foo=one; bar=two; baz=three\n')
284 eq(msg.get_params(header='x-header'),
285 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
286 msg = email.message_from_string(
287 'X-Header: foo; bar=one; baz=two\n')
288 eq(msg.get_params(header='x-header'),
289 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
290 eq(msg.get_params(), None)
291 msg = email.message_from_string(
292 'X-Header: foo; bar="one"; baz=two\n')
293 eq(msg.get_params(header='x-header'),
294 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
295
R David Murray97f43c02012-06-24 05:03:27 -0400296 # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000297 def test_get_param_liberal(self):
298 msg = Message()
299 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
300 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
301
302 def test_get_param(self):
303 eq = self.assertEqual
304 msg = email.message_from_string(
305 "X-Header: foo=one; bar=two; baz=three\n")
306 eq(msg.get_param('bar', header='x-header'), 'two')
307 eq(msg.get_param('quuz', header='x-header'), None)
308 eq(msg.get_param('quuz'), None)
309 msg = email.message_from_string(
310 'X-Header: foo; bar="one"; baz=two\n')
311 eq(msg.get_param('foo', header='x-header'), '')
312 eq(msg.get_param('bar', header='x-header'), 'one')
313 eq(msg.get_param('baz', header='x-header'), 'two')
314 # XXX: We are not RFC-2045 compliant! We cannot parse:
315 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
316 # msg.get_param("weird")
317 # yet.
318
R David Murray97f43c02012-06-24 05:03:27 -0400319 # test_headerregistry.TestContentTypeHeader.spaces_around_semis
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000320 def test_get_param_funky_continuation_lines(self):
321 msg = self._msgobj('msg_22.txt')
322 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
323
R David Murray97f43c02012-06-24 05:03:27 -0400324 # test_headerregistry.TestContentTypeHeader.semis_inside_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000325 def test_get_param_with_semis_in_quotes(self):
326 msg = email.message_from_string(
327 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
328 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
329 self.assertEqual(msg.get_param('name', unquote=False),
330 '"Jim&amp;&amp;Jill"')
331
R David Murray97f43c02012-06-24 05:03:27 -0400332 # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value
R. David Murrayd48739f2010-04-14 18:59:18 +0000333 def test_get_param_with_quotes(self):
334 msg = email.message_from_string(
335 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
336 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
337 msg = email.message_from_string(
338 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
339 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
340
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000341 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000342 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000343 msg = email.message_from_string('Header: exists')
344 unless('header' in msg)
345 unless('Header' in msg)
346 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000347 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000348
349 def test_set_param(self):
350 eq = self.assertEqual
351 msg = Message()
352 msg.set_param('charset', 'iso-2022-jp')
353 eq(msg.get_param('charset'), 'iso-2022-jp')
354 msg.set_param('importance', 'high value')
355 eq(msg.get_param('importance'), 'high value')
356 eq(msg.get_param('importance', unquote=False), '"high value"')
357 eq(msg.get_params(), [('text/plain', ''),
358 ('charset', 'iso-2022-jp'),
359 ('importance', 'high value')])
360 eq(msg.get_params(unquote=False), [('text/plain', ''),
361 ('charset', '"iso-2022-jp"'),
362 ('importance', '"high value"')])
363 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
364 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
365
366 def test_del_param(self):
367 eq = self.assertEqual
368 msg = self._msgobj('msg_05.txt')
369 eq(msg.get_params(),
370 [('multipart/report', ''), ('report-type', 'delivery-status'),
371 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
372 old_val = msg.get_param("report-type")
373 msg.del_param("report-type")
374 eq(msg.get_params(),
375 [('multipart/report', ''),
376 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
377 msg.set_param("report-type", old_val)
378 eq(msg.get_params(),
379 [('multipart/report', ''),
380 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
381 ('report-type', old_val)])
382
383 def test_del_param_on_other_header(self):
384 msg = Message()
385 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
386 msg.del_param('filename', 'content-disposition')
387 self.assertEqual(msg['content-disposition'], 'attachment')
388
R David Murraya2860e82011-04-16 09:20:30 -0400389 def test_del_param_on_nonexistent_header(self):
390 msg = Message()
391 msg.del_param('filename', 'content-disposition')
392
393 def test_del_nonexistent_param(self):
394 msg = Message()
395 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
396 existing_header = msg['Content-Type']
397 msg.del_param('foobar', header='Content-Type')
398 self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"')
399
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000400 def test_set_type(self):
401 eq = self.assertEqual
402 msg = Message()
403 self.assertRaises(ValueError, msg.set_type, 'text')
404 msg.set_type('text/plain')
405 eq(msg['content-type'], 'text/plain')
406 msg.set_param('charset', 'us-ascii')
407 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
408 msg.set_type('text/html')
409 eq(msg['content-type'], 'text/html; charset="us-ascii"')
410
411 def test_set_type_on_other_header(self):
412 msg = Message()
413 msg['X-Content-Type'] = 'text/plain'
414 msg.set_type('application/octet-stream', 'X-Content-Type')
415 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
416
417 def test_get_content_type_missing(self):
418 msg = Message()
419 self.assertEqual(msg.get_content_type(), 'text/plain')
420
421 def test_get_content_type_missing_with_default_type(self):
422 msg = Message()
423 msg.set_default_type('message/rfc822')
424 self.assertEqual(msg.get_content_type(), 'message/rfc822')
425
426 def test_get_content_type_from_message_implicit(self):
427 msg = self._msgobj('msg_30.txt')
428 self.assertEqual(msg.get_payload(0).get_content_type(),
429 'message/rfc822')
430
431 def test_get_content_type_from_message_explicit(self):
432 msg = self._msgobj('msg_28.txt')
433 self.assertEqual(msg.get_payload(0).get_content_type(),
434 'message/rfc822')
435
436 def test_get_content_type_from_message_text_plain_implicit(self):
437 msg = self._msgobj('msg_03.txt')
438 self.assertEqual(msg.get_content_type(), 'text/plain')
439
440 def test_get_content_type_from_message_text_plain_explicit(self):
441 msg = self._msgobj('msg_01.txt')
442 self.assertEqual(msg.get_content_type(), 'text/plain')
443
444 def test_get_content_maintype_missing(self):
445 msg = Message()
446 self.assertEqual(msg.get_content_maintype(), 'text')
447
448 def test_get_content_maintype_missing_with_default_type(self):
449 msg = Message()
450 msg.set_default_type('message/rfc822')
451 self.assertEqual(msg.get_content_maintype(), 'message')
452
453 def test_get_content_maintype_from_message_implicit(self):
454 msg = self._msgobj('msg_30.txt')
455 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
456
457 def test_get_content_maintype_from_message_explicit(self):
458 msg = self._msgobj('msg_28.txt')
459 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
460
461 def test_get_content_maintype_from_message_text_plain_implicit(self):
462 msg = self._msgobj('msg_03.txt')
463 self.assertEqual(msg.get_content_maintype(), 'text')
464
465 def test_get_content_maintype_from_message_text_plain_explicit(self):
466 msg = self._msgobj('msg_01.txt')
467 self.assertEqual(msg.get_content_maintype(), 'text')
468
469 def test_get_content_subtype_missing(self):
470 msg = Message()
471 self.assertEqual(msg.get_content_subtype(), 'plain')
472
473 def test_get_content_subtype_missing_with_default_type(self):
474 msg = Message()
475 msg.set_default_type('message/rfc822')
476 self.assertEqual(msg.get_content_subtype(), 'rfc822')
477
478 def test_get_content_subtype_from_message_implicit(self):
479 msg = self._msgobj('msg_30.txt')
480 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
481
482 def test_get_content_subtype_from_message_explicit(self):
483 msg = self._msgobj('msg_28.txt')
484 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
485
486 def test_get_content_subtype_from_message_text_plain_implicit(self):
487 msg = self._msgobj('msg_03.txt')
488 self.assertEqual(msg.get_content_subtype(), 'plain')
489
490 def test_get_content_subtype_from_message_text_plain_explicit(self):
491 msg = self._msgobj('msg_01.txt')
492 self.assertEqual(msg.get_content_subtype(), 'plain')
493
494 def test_get_content_maintype_error(self):
495 msg = Message()
496 msg['Content-Type'] = 'no-slash-in-this-string'
497 self.assertEqual(msg.get_content_maintype(), 'text')
498
499 def test_get_content_subtype_error(self):
500 msg = Message()
501 msg['Content-Type'] = 'no-slash-in-this-string'
502 self.assertEqual(msg.get_content_subtype(), 'plain')
503
504 def test_replace_header(self):
505 eq = self.assertEqual
506 msg = Message()
507 msg.add_header('First', 'One')
508 msg.add_header('Second', 'Two')
509 msg.add_header('Third', 'Three')
510 eq(msg.keys(), ['First', 'Second', 'Third'])
511 eq(msg.values(), ['One', 'Two', 'Three'])
512 msg.replace_header('Second', 'Twenty')
513 eq(msg.keys(), ['First', 'Second', 'Third'])
514 eq(msg.values(), ['One', 'Twenty', 'Three'])
515 msg.add_header('First', 'Eleven')
516 msg.replace_header('First', 'One Hundred')
517 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
518 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
519 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
520
R David Murray80e0aee2012-05-27 21:23:34 -0400521 # test_defect_handling:test_invalid_chars_in_base64_payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000522 def test_broken_base64_payload(self):
523 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
524 msg = Message()
525 msg['content-type'] = 'audio/x-midi'
526 msg['content-transfer-encoding'] = 'base64'
527 msg.set_payload(x)
528 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -0400529 (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0'
530 b'\xa1\x00p\xf6\xbf\xe9\x0f'))
531 self.assertIsInstance(msg.defects[0],
532 errors.InvalidBase64CharactersDefect)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000533
R David Murraya2860e82011-04-16 09:20:30 -0400534 def test_broken_unicode_payload(self):
535 # This test improves coverage but is not a compliance test.
536 # The behavior in this situation is currently undefined by the API.
537 x = 'this is a br\xf6ken thing to do'
538 msg = Message()
539 msg['content-type'] = 'text/plain'
540 msg['content-transfer-encoding'] = '8bit'
541 msg.set_payload(x)
542 self.assertEqual(msg.get_payload(decode=True),
543 bytes(x, 'raw-unicode-escape'))
544
545 def test_questionable_bytes_payload(self):
546 # This test improves coverage but is not a compliance test,
547 # since it involves poking inside the black box.
548 x = 'this is a quéstionable thing to do'.encode('utf-8')
549 msg = Message()
550 msg['content-type'] = 'text/plain; charset="utf-8"'
551 msg['content-transfer-encoding'] = '8bit'
552 msg._payload = x
553 self.assertEqual(msg.get_payload(decode=True), x)
554
R. David Murray7ec754b2010-12-13 23:51:19 +0000555 # Issue 1078919
556 def test_ascii_add_header(self):
557 msg = Message()
558 msg.add_header('Content-Disposition', 'attachment',
559 filename='bud.gif')
560 self.assertEqual('attachment; filename="bud.gif"',
561 msg['Content-Disposition'])
562
563 def test_noascii_add_header(self):
564 msg = Message()
565 msg.add_header('Content-Disposition', 'attachment',
566 filename="Fußballer.ppt")
567 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000568 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000569 msg['Content-Disposition'])
570
571 def test_nonascii_add_header_via_triple(self):
572 msg = Message()
573 msg.add_header('Content-Disposition', 'attachment',
574 filename=('iso-8859-1', '', 'Fußballer.ppt'))
575 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000576 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
577 msg['Content-Disposition'])
578
579 def test_ascii_add_header_with_tspecial(self):
580 msg = Message()
581 msg.add_header('Content-Disposition', 'attachment',
582 filename="windows [filename].ppt")
583 self.assertEqual(
584 'attachment; filename="windows [filename].ppt"',
585 msg['Content-Disposition'])
586
587 def test_nonascii_add_header_with_tspecial(self):
588 msg = Message()
589 msg.add_header('Content-Disposition', 'attachment',
590 filename="Fußballer [filename].ppt")
591 self.assertEqual(
592 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000593 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000594
R David Murraya2860e82011-04-16 09:20:30 -0400595 def test_add_header_with_name_only_param(self):
596 msg = Message()
597 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
598 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
599
600 def test_add_header_with_no_value(self):
601 msg = Message()
602 msg.add_header('X-Status', None)
603 self.assertEqual('', msg['X-Status'])
604
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000605 # Issue 5871: reject an attempt to embed a header inside a header value
606 # (header injection attack).
607 def test_embeded_header_via_Header_rejected(self):
608 msg = Message()
609 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
610 self.assertRaises(errors.HeaderParseError, msg.as_string)
611
612 def test_embeded_header_via_string_rejected(self):
613 msg = Message()
614 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
615 self.assertRaises(errors.HeaderParseError, msg.as_string)
616
R David Murray7441a7a2012-03-14 02:59:51 -0400617 def test_unicode_header_defaults_to_utf8_encoding(self):
618 # Issue 14291
619 m = MIMEText('abc\n')
620 m['Subject'] = 'É test'
621 self.assertEqual(str(m),textwrap.dedent("""\
622 Content-Type: text/plain; charset="us-ascii"
623 MIME-Version: 1.0
624 Content-Transfer-Encoding: 7bit
625 Subject: =?utf-8?q?=C3=89_test?=
626
627 abc
628 """))
629
R David Murray8680bcc2012-03-22 22:17:51 -0400630 def test_unicode_body_defaults_to_utf8_encoding(self):
631 # Issue 14291
632 m = MIMEText('É testabc\n')
633 self.assertEqual(str(m),textwrap.dedent("""\
R David Murray8680bcc2012-03-22 22:17:51 -0400634 Content-Type: text/plain; charset="utf-8"
R David Murray42243c42012-03-22 22:40:44 -0400635 MIME-Version: 1.0
R David Murray8680bcc2012-03-22 22:17:51 -0400636 Content-Transfer-Encoding: base64
637
638 w4kgdGVzdGFiYwo=
639 """))
640
641
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000642# Test the email.encoders module
643class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400644
645 def test_EncodersEncode_base64(self):
646 with openfile('PyBanner048.gif', 'rb') as fp:
647 bindata = fp.read()
648 mimed = email.mime.image.MIMEImage(bindata)
649 base64ed = mimed.get_payload()
650 # the transfer-encoded body lines should all be <=76 characters
651 lines = base64ed.split('\n')
652 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
653
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000654 def test_encode_empty_payload(self):
655 eq = self.assertEqual
656 msg = Message()
657 msg.set_charset('us-ascii')
658 eq(msg['content-transfer-encoding'], '7bit')
659
660 def test_default_cte(self):
661 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000662 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000663 msg = MIMEText('hello world')
664 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000665 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000666 msg = MIMEText('hello \xf8 world')
R David Murray8680bcc2012-03-22 22:17:51 -0400667 eq(msg['content-transfer-encoding'], 'base64')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000668 # And now with a different charset
669 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
670 eq(msg['content-transfer-encoding'], 'quoted-printable')
671
R. David Murraye85200d2010-05-06 01:41:14 +0000672 def test_encode7or8bit(self):
673 # Make sure a charset whose input character set is 8bit but
674 # whose output character set is 7bit gets a transfer-encoding
675 # of 7bit.
676 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000677 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000678 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000679
Ezio Melottib3aedd42010-11-20 19:04:17 +0000680
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000681# Test long header wrapping
682class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400683
684 maxDiff = None
685
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000686 def test_split_long_continuation(self):
687 eq = self.ndiffAssertEqual
688 msg = email.message_from_string("""\
689Subject: bug demonstration
690\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
691\tmore text
692
693test
694""")
695 sfp = StringIO()
696 g = Generator(sfp)
697 g.flatten(msg)
698 eq(sfp.getvalue(), """\
699Subject: bug demonstration
700\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
701\tmore text
702
703test
704""")
705
706 def test_another_long_almost_unsplittable_header(self):
707 eq = self.ndiffAssertEqual
708 hstr = """\
709bug demonstration
710\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
711\tmore text"""
712 h = Header(hstr, continuation_ws='\t')
713 eq(h.encode(), """\
714bug demonstration
715\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
716\tmore text""")
717 h = Header(hstr.replace('\t', ' '))
718 eq(h.encode(), """\
719bug demonstration
720 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
721 more text""")
722
723 def test_long_nonstring(self):
724 eq = self.ndiffAssertEqual
725 g = Charset("iso-8859-1")
726 cz = Charset("iso-8859-2")
727 utf8 = Charset("utf-8")
728 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
729 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
730 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
731 b'bef\xf6rdert. ')
732 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
733 b'd\xf9vtipu.. ')
734 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
735 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
736 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
737 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
738 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
739 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
740 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
741 '\u3044\u307e\u3059\u3002')
742 h = Header(g_head, g, header_name='Subject')
743 h.append(cz_head, cz)
744 h.append(utf8_head, utf8)
745 msg = Message()
746 msg['Subject'] = h
747 sfp = StringIO()
748 g = Generator(sfp)
749 g.flatten(msg)
750 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000751Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
752 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
753 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
754 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
755 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
756 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
757 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
758 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
759 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
760 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
761 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000762
763""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000764 eq(h.encode(maxlinelen=76), """\
765=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
766 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
767 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
768 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
769 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
770 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
771 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
772 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
773 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
774 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
775 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000776
777 def test_long_header_encode(self):
778 eq = self.ndiffAssertEqual
779 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
780 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
781 header_name='X-Foobar-Spoink-Defrobnit')
782 eq(h.encode(), '''\
783wasnipoop; giraffes="very-long-necked-animals";
784 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
785
786 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
787 eq = self.ndiffAssertEqual
788 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
789 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
790 header_name='X-Foobar-Spoink-Defrobnit',
791 continuation_ws='\t')
792 eq(h.encode(), '''\
793wasnipoop; giraffes="very-long-necked-animals";
794 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
795
796 def test_long_header_encode_with_tab_continuation(self):
797 eq = self.ndiffAssertEqual
798 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
799 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
800 header_name='X-Foobar-Spoink-Defrobnit',
801 continuation_ws='\t')
802 eq(h.encode(), '''\
803wasnipoop; giraffes="very-long-necked-animals";
804\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
805
R David Murray3a6152f2011-03-14 21:13:03 -0400806 def test_header_encode_with_different_output_charset(self):
807 h = Header('文', 'euc-jp')
808 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
809
810 def test_long_header_encode_with_different_output_charset(self):
811 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
812 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
813 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
814 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
815 res = """\
816=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
817 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
818 self.assertEqual(h.encode(), res)
819
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000820 def test_header_splitter(self):
821 eq = self.ndiffAssertEqual
822 msg = MIMEText('')
823 # It'd be great if we could use add_header() here, but that doesn't
824 # guarantee an order of the parameters.
825 msg['X-Foobar-Spoink-Defrobnit'] = (
826 'wasnipoop; giraffes="very-long-necked-animals"; '
827 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
828 sfp = StringIO()
829 g = Generator(sfp)
830 g.flatten(msg)
831 eq(sfp.getvalue(), '''\
832Content-Type: text/plain; charset="us-ascii"
833MIME-Version: 1.0
834Content-Transfer-Encoding: 7bit
835X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
836 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
837
838''')
839
840 def test_no_semis_header_splitter(self):
841 eq = self.ndiffAssertEqual
842 msg = Message()
843 msg['From'] = 'test@dom.ain'
844 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
845 msg.set_payload('Test')
846 sfp = StringIO()
847 g = Generator(sfp)
848 g.flatten(msg)
849 eq(sfp.getvalue(), """\
850From: test@dom.ain
851References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
852 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
853
854Test""")
855
R David Murray7da4db12011-04-07 20:37:17 -0400856 def test_last_split_chunk_does_not_fit(self):
857 eq = self.ndiffAssertEqual
858 h = Header('Subject: the first part of this is short, but_the_second'
859 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
860 '_all_by_itself')
861 eq(h.encode(), """\
862Subject: the first part of this is short,
863 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
864
865 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
866 eq = self.ndiffAssertEqual
867 h = Header(', but_the_second'
868 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
869 '_all_by_itself')
870 eq(h.encode(), """\
871,
872 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
873
874 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
875 eq = self.ndiffAssertEqual
876 h = Header(', , but_the_second'
877 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
878 '_all_by_itself')
879 eq(h.encode(), """\
880, ,
881 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
882
883 def test_trailing_splitable_on_overlong_unsplitable(self):
884 eq = self.ndiffAssertEqual
885 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
886 'be_on_a_line_all_by_itself;')
887 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
888 "be_on_a_line_all_by_itself;")
889
890 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
891 eq = self.ndiffAssertEqual
892 h = Header('; '
893 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400894 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400895 eq(h.encode(), """\
896;
R David Murray01581ee2011-04-18 10:04:34 -0400897 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400898
R David Murraye1292a22011-04-07 20:54:03 -0400899 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400900 eq = self.ndiffAssertEqual
901 h = Header('This is a long line that has two whitespaces in a row. '
902 'This used to cause truncation of the header when folded')
903 eq(h.encode(), """\
904This is a long line that has two whitespaces in a row. This used to cause
905 truncation of the header when folded""")
906
R David Murray01581ee2011-04-18 10:04:34 -0400907 def test_splitter_split_on_punctuation_only_if_fws(self):
908 eq = self.ndiffAssertEqual
909 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
910 'they;arenotlegal;fold,points')
911 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
912 "arenotlegal;fold,points")
913
914 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
915 eq = self.ndiffAssertEqual
916 h = Header('this is a test where we need to have more than one line '
917 'before; our final line that is just too big to fit;; '
918 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
919 'be_on_a_line_all_by_itself;')
920 eq(h.encode(), """\
921this is a test where we need to have more than one line before;
922 our final line that is just too big to fit;;
923 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
924
925 def test_overlong_last_part_followed_by_split_point(self):
926 eq = self.ndiffAssertEqual
927 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
928 'be_on_a_line_all_by_itself ')
929 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
930 "should_be_on_a_line_all_by_itself ")
931
932 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
933 eq = self.ndiffAssertEqual
934 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
935 'before_our_final_line_; ; '
936 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
937 'be_on_a_line_all_by_itself; ')
938 eq(h.encode(), """\
939this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
940 ;
941 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
942
943 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
944 eq = self.ndiffAssertEqual
945 h = Header('this is a test where we need to have more than one line '
946 'before our final line; ; '
947 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
948 'be_on_a_line_all_by_itself; ')
949 eq(h.encode(), """\
950this is a test where we need to have more than one line before our final line;
951 ;
952 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
953
954 def test_long_header_with_whitespace_runs(self):
955 eq = self.ndiffAssertEqual
956 msg = Message()
957 msg['From'] = 'test@dom.ain'
958 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
959 msg.set_payload('Test')
960 sfp = StringIO()
961 g = Generator(sfp)
962 g.flatten(msg)
963 eq(sfp.getvalue(), """\
964From: test@dom.ain
965References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
966 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
967 <foo@dom.ain> <foo@dom.ain>\x20\x20
968
969Test""")
970
971 def test_long_run_with_semi_header_splitter(self):
972 eq = self.ndiffAssertEqual
973 msg = Message()
974 msg['From'] = 'test@dom.ain'
975 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
976 msg.set_payload('Test')
977 sfp = StringIO()
978 g = Generator(sfp)
979 g.flatten(msg)
980 eq(sfp.getvalue(), """\
981From: test@dom.ain
982References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
983 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
984 <foo@dom.ain>; abc
985
986Test""")
987
988 def test_splitter_split_on_punctuation_only_if_fws(self):
989 eq = self.ndiffAssertEqual
990 msg = Message()
991 msg['From'] = 'test@dom.ain'
992 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
993 'they;arenotlegal;fold,points')
994 msg.set_payload('Test')
995 sfp = StringIO()
996 g = Generator(sfp)
997 g.flatten(msg)
998 # XXX the space after the header should not be there.
999 eq(sfp.getvalue(), """\
1000From: test@dom.ain
1001References:\x20
1002 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
1003
1004Test""")
1005
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001006 def test_no_split_long_header(self):
1007 eq = self.ndiffAssertEqual
1008 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +00001009 h = Header(hstr)
1010 # These come on two lines because Headers are really field value
1011 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001012 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001013References:
1014 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1015 h = Header('x' * 80)
1016 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001017
1018 def test_splitting_multiple_long_lines(self):
1019 eq = self.ndiffAssertEqual
1020 hstr = """\
1021from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1022\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1023\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1024"""
1025 h = Header(hstr, continuation_ws='\t')
1026 eq(h.encode(), """\
1027from babylon.socal-raves.org (localhost [127.0.0.1]);
1028 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1029 for <mailman-admin@babylon.socal-raves.org>;
1030 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1031\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1032 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1033 for <mailman-admin@babylon.socal-raves.org>;
1034 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1035\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1036 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1037 for <mailman-admin@babylon.socal-raves.org>;
1038 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1039
1040 def test_splitting_first_line_only_is_long(self):
1041 eq = self.ndiffAssertEqual
1042 hstr = """\
1043from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1044\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1045\tid 17k4h5-00034i-00
1046\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1047 h = Header(hstr, maxlinelen=78, header_name='Received',
1048 continuation_ws='\t')
1049 eq(h.encode(), """\
1050from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1051 helo=cthulhu.gerg.ca)
1052\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1053\tid 17k4h5-00034i-00
1054\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1055
1056 def test_long_8bit_header(self):
1057 eq = self.ndiffAssertEqual
1058 msg = Message()
1059 h = Header('Britische Regierung gibt', 'iso-8859-1',
1060 header_name='Subject')
1061 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001062 eq(h.encode(maxlinelen=76), """\
1063=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1064 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001065 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001066 eq(msg.as_string(maxheaderlen=76), """\
1067Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1068 =?iso-8859-1?q?hore-Windkraftprojekte?=
1069
1070""")
1071 eq(msg.as_string(maxheaderlen=0), """\
1072Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001073
1074""")
1075
1076 def test_long_8bit_header_no_charset(self):
1077 eq = self.ndiffAssertEqual
1078 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001079 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1080 'f\xfcr Offshore-Windkraftprojekte '
1081 '<a-very-long-address@example.com>')
1082 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001083 eq(msg.as_string(maxheaderlen=78), """\
1084Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1085 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1086
1087""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001088 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001089 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001090 header_name='Reply-To')
1091 eq(msg.as_string(maxheaderlen=78), """\
1092Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1093 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001094
1095""")
1096
1097 def test_long_to_header(self):
1098 eq = self.ndiffAssertEqual
1099 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001100 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001101 '"Someone Test #B" <someone@umich.edu>, '
1102 '"Someone Test #C" <someone@eecs.umich.edu>, '
1103 '"Someone Test #D" <someone@eecs.umich.edu>')
1104 msg = Message()
1105 msg['To'] = to
1106 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001107To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001108 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001109 "Someone Test #C" <someone@eecs.umich.edu>,
1110 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001111
1112''')
1113
1114 def test_long_line_after_append(self):
1115 eq = self.ndiffAssertEqual
1116 s = 'This is an example of string which has almost the limit of header length.'
1117 h = Header(s)
1118 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001119 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001120This is an example of string which has almost the limit of header length.
1121 Add another line.""")
1122
1123 def test_shorter_line_with_append(self):
1124 eq = self.ndiffAssertEqual
1125 s = 'This is a shorter line.'
1126 h = Header(s)
1127 h.append('Add another sentence. (Surprise?)')
1128 eq(h.encode(),
1129 'This is a shorter line. Add another sentence. (Surprise?)')
1130
1131 def test_long_field_name(self):
1132 eq = self.ndiffAssertEqual
1133 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001134 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1135 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1136 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1137 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001138 h = Header(gs, 'iso-8859-1', header_name=fn)
1139 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001140 eq(h.encode(maxlinelen=76), """\
1141=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1142 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1143 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1144 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001145
1146 def test_long_received_header(self):
1147 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1148 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1149 'Wed, 05 Mar 2003 18:10:18 -0700')
1150 msg = Message()
1151 msg['Received-1'] = Header(h, continuation_ws='\t')
1152 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001153 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001154 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001155Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1156 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001157 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001158Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1159 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001160 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001161
1162""")
1163
1164 def test_string_headerinst_eq(self):
1165 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1166 'tu-muenchen.de> (David Bremner\'s message of '
1167 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1168 msg = Message()
1169 msg['Received-1'] = Header(h, header_name='Received-1',
1170 continuation_ws='\t')
1171 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001172 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001173 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001174Received-1:\x20
1175 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1176 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1177Received-2:\x20
1178 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1179 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001180
1181""")
1182
1183 def test_long_unbreakable_lines_with_continuation(self):
1184 eq = self.ndiffAssertEqual
1185 msg = Message()
1186 t = """\
1187iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1188 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1189 msg['Face-1'] = t
1190 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001191 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001192 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001193 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001194 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001195Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001196 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001197 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001198Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001199 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001200 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001201Face-3:\x20
1202 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1203 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001204
1205""")
1206
1207 def test_another_long_multiline_header(self):
1208 eq = self.ndiffAssertEqual
1209 m = ('Received: from siimage.com '
1210 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001211 'Microsoft SMTPSVC(5.0.2195.4905); '
1212 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001213 msg = email.message_from_string(m)
1214 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001215Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1216 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001217
1218''')
1219
1220 def test_long_lines_with_different_header(self):
1221 eq = self.ndiffAssertEqual
1222 h = ('List-Unsubscribe: '
1223 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1224 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1225 '?subject=unsubscribe>')
1226 msg = Message()
1227 msg['List'] = h
1228 msg['List'] = Header(h, header_name='List')
1229 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001230List: List-Unsubscribe:
1231 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001232 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001233List: List-Unsubscribe:
1234 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001235 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001236
1237""")
1238
R. David Murray6f0022d2011-01-07 21:57:25 +00001239 def test_long_rfc2047_header_with_embedded_fws(self):
1240 h = Header(textwrap.dedent("""\
1241 We're going to pretend this header is in a non-ascii character set
1242 \tto see if line wrapping with encoded words and embedded
1243 folding white space works"""),
1244 charset='utf-8',
1245 header_name='Test')
1246 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1247 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1248 =?utf-8?q?cter_set?=
1249 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1250 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1251
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001252
Ezio Melottib3aedd42010-11-20 19:04:17 +00001253
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001254# Test mangling of "From " lines in the body of a message
1255class TestFromMangling(unittest.TestCase):
1256 def setUp(self):
1257 self.msg = Message()
1258 self.msg['From'] = 'aaa@bbb.org'
1259 self.msg.set_payload("""\
1260From the desk of A.A.A.:
1261Blah blah blah
1262""")
1263
1264 def test_mangled_from(self):
1265 s = StringIO()
1266 g = Generator(s, mangle_from_=True)
1267 g.flatten(self.msg)
1268 self.assertEqual(s.getvalue(), """\
1269From: aaa@bbb.org
1270
1271>From the desk of A.A.A.:
1272Blah blah blah
1273""")
1274
1275 def test_dont_mangle_from(self):
1276 s = StringIO()
1277 g = Generator(s, mangle_from_=False)
1278 g.flatten(self.msg)
1279 self.assertEqual(s.getvalue(), """\
1280From: aaa@bbb.org
1281
1282From the desk of A.A.A.:
1283Blah blah blah
1284""")
1285
R David Murray6a31bc62012-07-22 21:47:53 -04001286 def test_mangle_from_in_preamble_and_epilog(self):
1287 s = StringIO()
1288 g = Generator(s, mangle_from_=True)
1289 msg = email.message_from_string(textwrap.dedent("""\
1290 From: foo@bar.com
1291 Mime-Version: 1.0
1292 Content-Type: multipart/mixed; boundary=XXX
1293
1294 From somewhere unknown
1295
1296 --XXX
1297 Content-Type: text/plain
1298
1299 foo
1300
1301 --XXX--
1302
1303 From somewhere unknowable
1304 """))
1305 g.flatten(msg)
1306 self.assertEqual(len([1 for x in s.getvalue().split('\n')
1307 if x.startswith('>From ')]), 2)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001308
R David Murray638d40b2012-08-24 11:14:13 -04001309 def test_mangled_from_with_bad_bytes(self):
1310 source = textwrap.dedent("""\
1311 Content-Type: text/plain; charset="utf-8"
1312 MIME-Version: 1.0
1313 Content-Transfer-Encoding: 8bit
1314 From: aaa@bbb.org
1315
1316 """).encode('utf-8')
1317 msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')
1318 b = BytesIO()
1319 g = BytesGenerator(b, mangle_from_=True)
1320 g.flatten(msg)
1321 self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')
1322
Ezio Melottib3aedd42010-11-20 19:04:17 +00001323
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001324# Test the basic MIMEAudio class
1325class TestMIMEAudio(unittest.TestCase):
1326 def setUp(self):
R David Murray28346b82011-03-31 11:40:20 -04001327 with openfile('audiotest.au', 'rb') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001328 self._audiodata = fp.read()
1329 self._au = MIMEAudio(self._audiodata)
1330
1331 def test_guess_minor_type(self):
1332 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1333
1334 def test_encoding(self):
1335 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001336 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1337 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001338
1339 def test_checkSetMinor(self):
1340 au = MIMEAudio(self._audiodata, 'fish')
1341 self.assertEqual(au.get_content_type(), 'audio/fish')
1342
1343 def test_add_header(self):
1344 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001345 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001346 self._au.add_header('Content-Disposition', 'attachment',
1347 filename='audiotest.au')
1348 eq(self._au['content-disposition'],
1349 'attachment; filename="audiotest.au"')
1350 eq(self._au.get_params(header='content-disposition'),
1351 [('attachment', ''), ('filename', 'audiotest.au')])
1352 eq(self._au.get_param('filename', header='content-disposition'),
1353 'audiotest.au')
1354 missing = []
1355 eq(self._au.get_param('attachment', header='content-disposition'), '')
1356 unless(self._au.get_param('foo', failobj=missing,
1357 header='content-disposition') is missing)
1358 # Try some missing stuff
1359 unless(self._au.get_param('foobar', missing) is missing)
1360 unless(self._au.get_param('attachment', missing,
1361 header='foobar') is missing)
1362
1363
Ezio Melottib3aedd42010-11-20 19:04:17 +00001364
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001365# Test the basic MIMEImage class
1366class TestMIMEImage(unittest.TestCase):
1367 def setUp(self):
1368 with openfile('PyBanner048.gif', 'rb') as fp:
1369 self._imgdata = fp.read()
1370 self._im = MIMEImage(self._imgdata)
1371
1372 def test_guess_minor_type(self):
1373 self.assertEqual(self._im.get_content_type(), 'image/gif')
1374
1375 def test_encoding(self):
1376 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001377 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1378 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001379
1380 def test_checkSetMinor(self):
1381 im = MIMEImage(self._imgdata, 'fish')
1382 self.assertEqual(im.get_content_type(), 'image/fish')
1383
1384 def test_add_header(self):
1385 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001386 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001387 self._im.add_header('Content-Disposition', 'attachment',
1388 filename='dingusfish.gif')
1389 eq(self._im['content-disposition'],
1390 'attachment; filename="dingusfish.gif"')
1391 eq(self._im.get_params(header='content-disposition'),
1392 [('attachment', ''), ('filename', 'dingusfish.gif')])
1393 eq(self._im.get_param('filename', header='content-disposition'),
1394 'dingusfish.gif')
1395 missing = []
1396 eq(self._im.get_param('attachment', header='content-disposition'), '')
1397 unless(self._im.get_param('foo', failobj=missing,
1398 header='content-disposition') is missing)
1399 # Try some missing stuff
1400 unless(self._im.get_param('foobar', missing) is missing)
1401 unless(self._im.get_param('attachment', missing,
1402 header='foobar') is missing)
1403
1404
Ezio Melottib3aedd42010-11-20 19:04:17 +00001405
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001406# Test the basic MIMEApplication class
1407class TestMIMEApplication(unittest.TestCase):
1408 def test_headers(self):
1409 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001410 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001411 eq(msg.get_content_type(), 'application/octet-stream')
1412 eq(msg['content-transfer-encoding'], 'base64')
1413
1414 def test_body(self):
1415 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001416 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1417 msg = MIMEApplication(bytesdata)
1418 # whitespace in the cte encoded block is RFC-irrelevant.
1419 eq(msg.get_payload().strip(), '+vv8/f7/')
1420 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001421
1422
Ezio Melottib3aedd42010-11-20 19:04:17 +00001423
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001424# Test the basic MIMEText class
1425class TestMIMEText(unittest.TestCase):
1426 def setUp(self):
1427 self._msg = MIMEText('hello there')
1428
1429 def test_types(self):
1430 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001431 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001432 eq(self._msg.get_content_type(), 'text/plain')
1433 eq(self._msg.get_param('charset'), 'us-ascii')
1434 missing = []
1435 unless(self._msg.get_param('foobar', missing) is missing)
1436 unless(self._msg.get_param('charset', missing, header='foobar')
1437 is missing)
1438
1439 def test_payload(self):
1440 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001441 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001442
1443 def test_charset(self):
1444 eq = self.assertEqual
1445 msg = MIMEText('hello there', _charset='us-ascii')
1446 eq(msg.get_charset().input_charset, 'us-ascii')
1447 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1448
R. David Murray850fc852010-06-03 01:58:28 +00001449 def test_7bit_input(self):
1450 eq = self.assertEqual
1451 msg = MIMEText('hello there', _charset='us-ascii')
1452 eq(msg.get_charset().input_charset, 'us-ascii')
1453 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1454
1455 def test_7bit_input_no_charset(self):
1456 eq = self.assertEqual
1457 msg = MIMEText('hello there')
1458 eq(msg.get_charset(), 'us-ascii')
1459 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1460 self.assertTrue('hello there' in msg.as_string())
1461
1462 def test_utf8_input(self):
1463 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1464 eq = self.assertEqual
1465 msg = MIMEText(teststr, _charset='utf-8')
1466 eq(msg.get_charset().output_charset, 'utf-8')
1467 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1468 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1469
1470 @unittest.skip("can't fix because of backward compat in email5, "
1471 "will fix in email6")
1472 def test_utf8_input_no_charset(self):
1473 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1474 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1475
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001476
Ezio Melottib3aedd42010-11-20 19:04:17 +00001477
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001478# Test complicated multipart/* messages
1479class TestMultipart(TestEmailBase):
1480 def setUp(self):
1481 with openfile('PyBanner048.gif', 'rb') as fp:
1482 data = fp.read()
1483 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1484 image = MIMEImage(data, name='dingusfish.gif')
1485 image.add_header('content-disposition', 'attachment',
1486 filename='dingusfish.gif')
1487 intro = MIMEText('''\
1488Hi there,
1489
1490This is the dingus fish.
1491''')
1492 container.attach(intro)
1493 container.attach(image)
1494 container['From'] = 'Barry <barry@digicool.com>'
1495 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1496 container['Subject'] = 'Here is your dingus fish'
1497
1498 now = 987809702.54848599
1499 timetuple = time.localtime(now)
1500 if timetuple[-1] == 0:
1501 tzsecs = time.timezone
1502 else:
1503 tzsecs = time.altzone
1504 if tzsecs > 0:
1505 sign = '-'
1506 else:
1507 sign = '+'
1508 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1509 container['Date'] = time.strftime(
1510 '%a, %d %b %Y %H:%M:%S',
1511 time.localtime(now)) + tzoffset
1512 self._msg = container
1513 self._im = image
1514 self._txt = intro
1515
1516 def test_hierarchy(self):
1517 # convenience
1518 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001519 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001520 raises = self.assertRaises
1521 # tests
1522 m = self._msg
1523 unless(m.is_multipart())
1524 eq(m.get_content_type(), 'multipart/mixed')
1525 eq(len(m.get_payload()), 2)
1526 raises(IndexError, m.get_payload, 2)
1527 m0 = m.get_payload(0)
1528 m1 = m.get_payload(1)
1529 unless(m0 is self._txt)
1530 unless(m1 is self._im)
1531 eq(m.get_payload(), [m0, m1])
1532 unless(not m0.is_multipart())
1533 unless(not m1.is_multipart())
1534
1535 def test_empty_multipart_idempotent(self):
1536 text = """\
1537Content-Type: multipart/mixed; boundary="BOUNDARY"
1538MIME-Version: 1.0
1539Subject: A subject
1540To: aperson@dom.ain
1541From: bperson@dom.ain
1542
1543
1544--BOUNDARY
1545
1546
1547--BOUNDARY--
1548"""
1549 msg = Parser().parsestr(text)
1550 self.ndiffAssertEqual(text, msg.as_string())
1551
1552 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1553 outer = MIMEBase('multipart', 'mixed')
1554 outer['Subject'] = 'A subject'
1555 outer['To'] = 'aperson@dom.ain'
1556 outer['From'] = 'bperson@dom.ain'
1557 outer.set_boundary('BOUNDARY')
1558 self.ndiffAssertEqual(outer.as_string(), '''\
1559Content-Type: multipart/mixed; boundary="BOUNDARY"
1560MIME-Version: 1.0
1561Subject: A subject
1562To: aperson@dom.ain
1563From: bperson@dom.ain
1564
1565--BOUNDARY
1566
1567--BOUNDARY--''')
1568
1569 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1570 outer = MIMEBase('multipart', 'mixed')
1571 outer['Subject'] = 'A subject'
1572 outer['To'] = 'aperson@dom.ain'
1573 outer['From'] = 'bperson@dom.ain'
1574 outer.preamble = ''
1575 outer.epilogue = ''
1576 outer.set_boundary('BOUNDARY')
1577 self.ndiffAssertEqual(outer.as_string(), '''\
1578Content-Type: multipart/mixed; boundary="BOUNDARY"
1579MIME-Version: 1.0
1580Subject: A subject
1581To: aperson@dom.ain
1582From: bperson@dom.ain
1583
1584
1585--BOUNDARY
1586
1587--BOUNDARY--
1588''')
1589
1590 def test_one_part_in_a_multipart(self):
1591 eq = self.ndiffAssertEqual
1592 outer = MIMEBase('multipart', 'mixed')
1593 outer['Subject'] = 'A subject'
1594 outer['To'] = 'aperson@dom.ain'
1595 outer['From'] = 'bperson@dom.ain'
1596 outer.set_boundary('BOUNDARY')
1597 msg = MIMEText('hello world')
1598 outer.attach(msg)
1599 eq(outer.as_string(), '''\
1600Content-Type: multipart/mixed; boundary="BOUNDARY"
1601MIME-Version: 1.0
1602Subject: A subject
1603To: aperson@dom.ain
1604From: bperson@dom.ain
1605
1606--BOUNDARY
1607Content-Type: text/plain; charset="us-ascii"
1608MIME-Version: 1.0
1609Content-Transfer-Encoding: 7bit
1610
1611hello world
1612--BOUNDARY--''')
1613
1614 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1615 eq = self.ndiffAssertEqual
1616 outer = MIMEBase('multipart', 'mixed')
1617 outer['Subject'] = 'A subject'
1618 outer['To'] = 'aperson@dom.ain'
1619 outer['From'] = 'bperson@dom.ain'
1620 outer.preamble = ''
1621 msg = MIMEText('hello world')
1622 outer.attach(msg)
1623 outer.set_boundary('BOUNDARY')
1624 eq(outer.as_string(), '''\
1625Content-Type: multipart/mixed; boundary="BOUNDARY"
1626MIME-Version: 1.0
1627Subject: A subject
1628To: aperson@dom.ain
1629From: bperson@dom.ain
1630
1631
1632--BOUNDARY
1633Content-Type: text/plain; charset="us-ascii"
1634MIME-Version: 1.0
1635Content-Transfer-Encoding: 7bit
1636
1637hello world
1638--BOUNDARY--''')
1639
1640
1641 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1642 eq = self.ndiffAssertEqual
1643 outer = MIMEBase('multipart', 'mixed')
1644 outer['Subject'] = 'A subject'
1645 outer['To'] = 'aperson@dom.ain'
1646 outer['From'] = 'bperson@dom.ain'
1647 outer.preamble = None
1648 msg = MIMEText('hello world')
1649 outer.attach(msg)
1650 outer.set_boundary('BOUNDARY')
1651 eq(outer.as_string(), '''\
1652Content-Type: multipart/mixed; boundary="BOUNDARY"
1653MIME-Version: 1.0
1654Subject: A subject
1655To: aperson@dom.ain
1656From: bperson@dom.ain
1657
1658--BOUNDARY
1659Content-Type: text/plain; charset="us-ascii"
1660MIME-Version: 1.0
1661Content-Transfer-Encoding: 7bit
1662
1663hello world
1664--BOUNDARY--''')
1665
1666
1667 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1668 eq = self.ndiffAssertEqual
1669 outer = MIMEBase('multipart', 'mixed')
1670 outer['Subject'] = 'A subject'
1671 outer['To'] = 'aperson@dom.ain'
1672 outer['From'] = 'bperson@dom.ain'
1673 outer.epilogue = None
1674 msg = MIMEText('hello world')
1675 outer.attach(msg)
1676 outer.set_boundary('BOUNDARY')
1677 eq(outer.as_string(), '''\
1678Content-Type: multipart/mixed; boundary="BOUNDARY"
1679MIME-Version: 1.0
1680Subject: A subject
1681To: aperson@dom.ain
1682From: bperson@dom.ain
1683
1684--BOUNDARY
1685Content-Type: text/plain; charset="us-ascii"
1686MIME-Version: 1.0
1687Content-Transfer-Encoding: 7bit
1688
1689hello world
1690--BOUNDARY--''')
1691
1692
1693 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1694 eq = self.ndiffAssertEqual
1695 outer = MIMEBase('multipart', 'mixed')
1696 outer['Subject'] = 'A subject'
1697 outer['To'] = 'aperson@dom.ain'
1698 outer['From'] = 'bperson@dom.ain'
1699 outer.epilogue = ''
1700 msg = MIMEText('hello world')
1701 outer.attach(msg)
1702 outer.set_boundary('BOUNDARY')
1703 eq(outer.as_string(), '''\
1704Content-Type: multipart/mixed; boundary="BOUNDARY"
1705MIME-Version: 1.0
1706Subject: A subject
1707To: aperson@dom.ain
1708From: bperson@dom.ain
1709
1710--BOUNDARY
1711Content-Type: text/plain; charset="us-ascii"
1712MIME-Version: 1.0
1713Content-Transfer-Encoding: 7bit
1714
1715hello world
1716--BOUNDARY--
1717''')
1718
1719
1720 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1721 eq = self.ndiffAssertEqual
1722 outer = MIMEBase('multipart', 'mixed')
1723 outer['Subject'] = 'A subject'
1724 outer['To'] = 'aperson@dom.ain'
1725 outer['From'] = 'bperson@dom.ain'
1726 outer.epilogue = '\n'
1727 msg = MIMEText('hello world')
1728 outer.attach(msg)
1729 outer.set_boundary('BOUNDARY')
1730 eq(outer.as_string(), '''\
1731Content-Type: multipart/mixed; boundary="BOUNDARY"
1732MIME-Version: 1.0
1733Subject: A subject
1734To: aperson@dom.ain
1735From: bperson@dom.ain
1736
1737--BOUNDARY
1738Content-Type: text/plain; charset="us-ascii"
1739MIME-Version: 1.0
1740Content-Transfer-Encoding: 7bit
1741
1742hello world
1743--BOUNDARY--
1744
1745''')
1746
1747 def test_message_external_body(self):
1748 eq = self.assertEqual
1749 msg = self._msgobj('msg_36.txt')
1750 eq(len(msg.get_payload()), 2)
1751 msg1 = msg.get_payload(1)
1752 eq(msg1.get_content_type(), 'multipart/alternative')
1753 eq(len(msg1.get_payload()), 2)
1754 for subpart in msg1.get_payload():
1755 eq(subpart.get_content_type(), 'message/external-body')
1756 eq(len(subpart.get_payload()), 1)
1757 subsubpart = subpart.get_payload(0)
1758 eq(subsubpart.get_content_type(), 'text/plain')
1759
1760 def test_double_boundary(self):
1761 # msg_37.txt is a multipart that contains two dash-boundary's in a
1762 # row. Our interpretation of RFC 2046 calls for ignoring the second
1763 # and subsequent boundaries.
1764 msg = self._msgobj('msg_37.txt')
1765 self.assertEqual(len(msg.get_payload()), 3)
1766
1767 def test_nested_inner_contains_outer_boundary(self):
1768 eq = self.ndiffAssertEqual
1769 # msg_38.txt has an inner part that contains outer boundaries. My
1770 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1771 # these are illegal and should be interpreted as unterminated inner
1772 # parts.
1773 msg = self._msgobj('msg_38.txt')
1774 sfp = StringIO()
1775 iterators._structure(msg, sfp)
1776 eq(sfp.getvalue(), """\
1777multipart/mixed
1778 multipart/mixed
1779 multipart/alternative
1780 text/plain
1781 text/plain
1782 text/plain
1783 text/plain
1784""")
1785
1786 def test_nested_with_same_boundary(self):
1787 eq = self.ndiffAssertEqual
1788 # msg 39.txt is similarly evil in that it's got inner parts that use
1789 # the same boundary as outer parts. Again, I believe the way this is
1790 # parsed is closest to the spirit of RFC 2046
1791 msg = self._msgobj('msg_39.txt')
1792 sfp = StringIO()
1793 iterators._structure(msg, sfp)
1794 eq(sfp.getvalue(), """\
1795multipart/mixed
1796 multipart/mixed
1797 multipart/alternative
1798 application/octet-stream
1799 application/octet-stream
1800 text/plain
1801""")
1802
1803 def test_boundary_in_non_multipart(self):
1804 msg = self._msgobj('msg_40.txt')
1805 self.assertEqual(msg.as_string(), '''\
1806MIME-Version: 1.0
1807Content-Type: text/html; boundary="--961284236552522269"
1808
1809----961284236552522269
1810Content-Type: text/html;
1811Content-Transfer-Encoding: 7Bit
1812
1813<html></html>
1814
1815----961284236552522269--
1816''')
1817
1818 def test_boundary_with_leading_space(self):
1819 eq = self.assertEqual
1820 msg = email.message_from_string('''\
1821MIME-Version: 1.0
1822Content-Type: multipart/mixed; boundary=" XXXX"
1823
1824-- XXXX
1825Content-Type: text/plain
1826
1827
1828-- XXXX
1829Content-Type: text/plain
1830
1831-- XXXX--
1832''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001833 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001834 eq(msg.get_boundary(), ' XXXX')
1835 eq(len(msg.get_payload()), 2)
1836
1837 def test_boundary_without_trailing_newline(self):
1838 m = Parser().parsestr("""\
1839Content-Type: multipart/mixed; boundary="===============0012394164=="
1840MIME-Version: 1.0
1841
1842--===============0012394164==
1843Content-Type: image/file1.jpg
1844MIME-Version: 1.0
1845Content-Transfer-Encoding: base64
1846
1847YXNkZg==
1848--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001849 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001850
1851
Ezio Melottib3aedd42010-11-20 19:04:17 +00001852
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001853# Test some badly formatted messages
R David Murrayc27e5222012-05-25 15:01:48 -04001854class TestNonConformant(TestEmailBase):
R David Murray3edd22a2011-04-18 13:59:37 -04001855
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001856 def test_parse_missing_minor_type(self):
1857 eq = self.assertEqual
1858 msg = self._msgobj('msg_14.txt')
1859 eq(msg.get_content_type(), 'text/plain')
1860 eq(msg.get_content_maintype(), 'text')
1861 eq(msg.get_content_subtype(), 'plain')
1862
R David Murray80e0aee2012-05-27 21:23:34 -04001863 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001864 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001865 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001866 msg = self._msgobj('msg_15.txt')
1867 # XXX We can probably eventually do better
1868 inner = msg.get_payload(0)
1869 unless(hasattr(inner, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04001870 self.assertEqual(len(inner.defects), 1)
1871 unless(isinstance(inner.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001872 errors.StartBoundaryNotFoundDefect))
1873
R David Murray80e0aee2012-05-27 21:23:34 -04001874 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001875 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001876 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001877 msg = self._msgobj('msg_25.txt')
1878 unless(isinstance(msg.get_payload(), str))
R David Murrayc27e5222012-05-25 15:01:48 -04001879 self.assertEqual(len(msg.defects), 2)
1880 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04001881 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04001882 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001883 errors.MultipartInvariantViolationDefect))
1884
R David Murray749073a2011-06-22 13:47:53 -04001885 multipart_msg = textwrap.dedent("""\
1886 Date: Wed, 14 Nov 2007 12:56:23 GMT
1887 From: foo@bar.invalid
1888 To: foo@bar.invalid
1889 Subject: Content-Transfer-Encoding: base64 and multipart
1890 MIME-Version: 1.0
1891 Content-Type: multipart/mixed;
1892 boundary="===============3344438784458119861=="{}
1893
1894 --===============3344438784458119861==
1895 Content-Type: text/plain
1896
1897 Test message
1898
1899 --===============3344438784458119861==
1900 Content-Type: application/octet-stream
1901 Content-Transfer-Encoding: base64
1902
1903 YWJj
1904
1905 --===============3344438784458119861==--
1906 """)
1907
R David Murray80e0aee2012-05-27 21:23:34 -04001908 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04001909 def test_multipart_invalid_cte(self):
R David Murrayc27e5222012-05-25 15:01:48 -04001910 msg = self._str_msg(
1911 self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
1912 self.assertEqual(len(msg.defects), 1)
1913 self.assertIsInstance(msg.defects[0],
R David Murray749073a2011-06-22 13:47:53 -04001914 errors.InvalidMultipartContentTransferEncodingDefect)
1915
R David Murray80e0aee2012-05-27 21:23:34 -04001916 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04001917 def test_multipart_no_cte_no_defect(self):
R David Murrayc27e5222012-05-25 15:01:48 -04001918 msg = self._str_msg(self.multipart_msg.format(''))
1919 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04001920
R David Murray80e0aee2012-05-27 21:23:34 -04001921 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04001922 def test_multipart_valid_cte_no_defect(self):
1923 for cte in ('7bit', '8bit', 'BINary'):
R David Murrayc27e5222012-05-25 15:01:48 -04001924 msg = self._str_msg(
R David Murray749073a2011-06-22 13:47:53 -04001925 self.multipart_msg.format(
R David Murrayc27e5222012-05-25 15:01:48 -04001926 "\nContent-Transfer-Encoding: {}".format(cte)))
1927 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04001928
R David Murray97f43c02012-06-24 05:03:27 -04001929 # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001930 def test_invalid_content_type(self):
1931 eq = self.assertEqual
1932 neq = self.ndiffAssertEqual
1933 msg = Message()
1934 # RFC 2045, $5.2 says invalid yields text/plain
1935 msg['Content-Type'] = 'text'
1936 eq(msg.get_content_maintype(), 'text')
1937 eq(msg.get_content_subtype(), 'plain')
1938 eq(msg.get_content_type(), 'text/plain')
1939 # Clear the old value and try something /really/ invalid
1940 del msg['content-type']
1941 msg['Content-Type'] = 'foo'
1942 eq(msg.get_content_maintype(), 'text')
1943 eq(msg.get_content_subtype(), 'plain')
1944 eq(msg.get_content_type(), 'text/plain')
1945 # Still, make sure that the message is idempotently generated
1946 s = StringIO()
1947 g = Generator(s)
1948 g.flatten(msg)
1949 neq(s.getvalue(), 'Content-Type: foo\n\n')
1950
1951 def test_no_start_boundary(self):
1952 eq = self.ndiffAssertEqual
1953 msg = self._msgobj('msg_31.txt')
1954 eq(msg.get_payload(), """\
1955--BOUNDARY
1956Content-Type: text/plain
1957
1958message 1
1959
1960--BOUNDARY
1961Content-Type: text/plain
1962
1963message 2
1964
1965--BOUNDARY--
1966""")
1967
1968 def test_no_separating_blank_line(self):
1969 eq = self.ndiffAssertEqual
1970 msg = self._msgobj('msg_35.txt')
1971 eq(msg.as_string(), """\
1972From: aperson@dom.ain
1973To: bperson@dom.ain
1974Subject: here's something interesting
1975
1976counter to RFC 2822, there's no separating newline here
1977""")
1978
R David Murray80e0aee2012-05-27 21:23:34 -04001979 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001980 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001981 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001982 msg = self._msgobj('msg_41.txt')
1983 unless(hasattr(msg, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04001984 self.assertEqual(len(msg.defects), 2)
1985 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04001986 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04001987 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001988 errors.MultipartInvariantViolationDefect))
1989
R David Murray80e0aee2012-05-27 21:23:34 -04001990 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001991 def test_missing_start_boundary(self):
1992 outer = self._msgobj('msg_42.txt')
1993 # The message structure is:
1994 #
1995 # multipart/mixed
1996 # text/plain
1997 # message/rfc822
1998 # multipart/mixed [*]
1999 #
2000 # [*] This message is missing its start boundary
2001 bad = outer.get_payload(1).get_payload(0)
R David Murrayc27e5222012-05-25 15:01:48 -04002002 self.assertEqual(len(bad.defects), 1)
2003 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002004 errors.StartBoundaryNotFoundDefect))
2005
R David Murray80e0aee2012-05-27 21:23:34 -04002006 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002007 def test_first_line_is_continuation_header(self):
2008 eq = self.assertEqual
R David Murrayadbdcdb2012-05-27 20:45:01 -04002009 m = ' Line 1\nSubject: test\n\nbody'
R David Murrayc27e5222012-05-25 15:01:48 -04002010 msg = email.message_from_string(m)
R David Murrayadbdcdb2012-05-27 20:45:01 -04002011 eq(msg.keys(), ['Subject'])
2012 eq(msg.get_payload(), 'body')
R David Murrayc27e5222012-05-25 15:01:48 -04002013 eq(len(msg.defects), 1)
R David Murrayadbdcdb2012-05-27 20:45:01 -04002014 self.assertDefectsEqual(msg.defects,
2015 [errors.FirstHeaderLineIsContinuationDefect])
R David Murrayc27e5222012-05-25 15:01:48 -04002016 eq(msg.defects[0].line, ' Line 1\n')
R David Murray3edd22a2011-04-18 13:59:37 -04002017
R David Murrayd41595b2012-05-28 20:14:10 -04002018 # test_defect_handling
R David Murrayadbdcdb2012-05-27 20:45:01 -04002019 def test_missing_header_body_separator(self):
2020 # Our heuristic if we see a line that doesn't look like a header (no
2021 # leading whitespace but no ':') is to assume that the blank line that
2022 # separates the header from the body is missing, and to stop parsing
2023 # headers and start parsing the body.
2024 msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
2025 self.assertEqual(msg.keys(), ['Subject'])
2026 self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
2027 self.assertDefectsEqual(msg.defects,
2028 [errors.MissingHeaderBodySeparatorDefect])
2029
Ezio Melottib3aedd42010-11-20 19:04:17 +00002030
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002031# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00002032class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002033 def test_rfc2047_multiline(self):
2034 eq = self.assertEqual
2035 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2036 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2037 dh = decode_header(s)
2038 eq(dh, [
R David Murray07ea53c2012-06-02 17:56:49 -04002039 (b'Re: ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002040 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
R David Murray07ea53c2012-06-02 17:56:49 -04002041 (b' baz foo bar ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002042 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2043 header = make_header(dh)
2044 eq(str(header),
2045 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00002046 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002047Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2048 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002049
R David Murray07ea53c2012-06-02 17:56:49 -04002050 def test_whitespace_keeper_unicode(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002051 eq = self.assertEqual
2052 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2053 dh = decode_header(s)
2054 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
R David Murray07ea53c2012-06-02 17:56:49 -04002055 (b' Pirard <pirard@dom.ain>', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002056 header = str(make_header(dh))
2057 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2058
R David Murray07ea53c2012-06-02 17:56:49 -04002059 def test_whitespace_keeper_unicode_2(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002060 eq = self.assertEqual
2061 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2062 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002063 eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'),
2064 (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002065 hu = str(make_header(dh))
2066 eq(hu, 'The quick brown fox jumped over the lazy dog')
2067
2068 def test_rfc2047_missing_whitespace(self):
2069 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2070 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002071 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2072 (b'rg', None), (b'\xe5', 'iso-8859-1'),
2073 (b'sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002074
2075 def test_rfc2047_with_whitespace(self):
2076 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2077 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002078 self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'),
2079 (b' rg ', None), (b'\xe5', 'iso-8859-1'),
2080 (b' sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002081
R. David Murrayc4e69cc2010-08-03 22:14:10 +00002082 def test_rfc2047_B_bad_padding(self):
2083 s = '=?iso-8859-1?B?%s?='
2084 data = [ # only test complete bytes
2085 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2086 ('dmk=', b'vi'), ('dmk', b'vi')
2087 ]
2088 for q, a in data:
2089 dh = decode_header(s % q)
2090 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002091
R. David Murray31e984c2010-10-01 15:40:20 +00002092 def test_rfc2047_Q_invalid_digits(self):
2093 # issue 10004.
2094 s = '=?iso-8659-1?Q?andr=e9=zz?='
2095 self.assertEqual(decode_header(s),
2096 [(b'andr\xe9=zz', 'iso-8659-1')])
2097
R David Murray07ea53c2012-06-02 17:56:49 -04002098 def test_rfc2047_rfc2047_1(self):
2099 # 1st testcase at end of rfc2047
2100 s = '(=?ISO-8859-1?Q?a?=)'
2101 self.assertEqual(decode_header(s),
2102 [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)])
2103
2104 def test_rfc2047_rfc2047_2(self):
2105 # 2nd testcase at end of rfc2047
2106 s = '(=?ISO-8859-1?Q?a?= b)'
2107 self.assertEqual(decode_header(s),
2108 [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)])
2109
2110 def test_rfc2047_rfc2047_3(self):
2111 # 3rd testcase at end of rfc2047
2112 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2113 self.assertEqual(decode_header(s),
2114 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2115
2116 def test_rfc2047_rfc2047_4(self):
2117 # 4th testcase at end of rfc2047
2118 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2119 self.assertEqual(decode_header(s),
2120 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2121
2122 def test_rfc2047_rfc2047_5a(self):
2123 # 5th testcase at end of rfc2047 newline is \r\n
2124 s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)'
2125 self.assertEqual(decode_header(s),
2126 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2127
2128 def test_rfc2047_rfc2047_5b(self):
2129 # 5th testcase at end of rfc2047 newline is \n
2130 s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)'
2131 self.assertEqual(decode_header(s),
2132 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2133
2134 def test_rfc2047_rfc2047_6(self):
2135 # 6th testcase at end of rfc2047
2136 s = '(=?ISO-8859-1?Q?a_b?=)'
2137 self.assertEqual(decode_header(s),
2138 [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)])
2139
2140 def test_rfc2047_rfc2047_7(self):
2141 # 7th testcase at end of rfc2047
2142 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)'
2143 self.assertEqual(decode_header(s),
2144 [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'),
2145 (b')', None)])
2146 self.assertEqual(make_header(decode_header(s)).encode(), s.lower())
2147 self.assertEqual(str(make_header(decode_header(s))), '(a b)')
2148
R David Murray82ffabd2012-06-03 12:27:07 -04002149 def test_multiline_header(self):
2150 s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>'
2151 self.assertEqual(decode_header(s),
2152 [(b'"M\xfcller T"', 'windows-1252'),
2153 (b'<T.Mueller@xxx.com>', None)])
2154 self.assertEqual(make_header(decode_header(s)).encode(),
2155 ''.join(s.splitlines()))
2156 self.assertEqual(str(make_header(decode_header(s))),
2157 '"Müller T" <T.Mueller@xxx.com>')
2158
Ezio Melottib3aedd42010-11-20 19:04:17 +00002159
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002160# Test the MIMEMessage class
2161class TestMIMEMessage(TestEmailBase):
2162 def setUp(self):
2163 with openfile('msg_11.txt') as fp:
2164 self._text = fp.read()
2165
2166 def test_type_error(self):
2167 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2168
2169 def test_valid_argument(self):
2170 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002171 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002172 subject = 'A sub-message'
2173 m = Message()
2174 m['Subject'] = subject
2175 r = MIMEMessage(m)
2176 eq(r.get_content_type(), 'message/rfc822')
2177 payload = r.get_payload()
2178 unless(isinstance(payload, list))
2179 eq(len(payload), 1)
2180 subpart = payload[0]
2181 unless(subpart is m)
2182 eq(subpart['subject'], subject)
2183
2184 def test_bad_multipart(self):
2185 eq = self.assertEqual
2186 msg1 = Message()
2187 msg1['Subject'] = 'subpart 1'
2188 msg2 = Message()
2189 msg2['Subject'] = 'subpart 2'
2190 r = MIMEMessage(msg1)
2191 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2192
2193 def test_generate(self):
2194 # First craft the message to be encapsulated
2195 m = Message()
2196 m['Subject'] = 'An enclosed message'
2197 m.set_payload('Here is the body of the message.\n')
2198 r = MIMEMessage(m)
2199 r['Subject'] = 'The enclosing message'
2200 s = StringIO()
2201 g = Generator(s)
2202 g.flatten(r)
2203 self.assertEqual(s.getvalue(), """\
2204Content-Type: message/rfc822
2205MIME-Version: 1.0
2206Subject: The enclosing message
2207
2208Subject: An enclosed message
2209
2210Here is the body of the message.
2211""")
2212
2213 def test_parse_message_rfc822(self):
2214 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002215 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002216 msg = self._msgobj('msg_11.txt')
2217 eq(msg.get_content_type(), 'message/rfc822')
2218 payload = msg.get_payload()
2219 unless(isinstance(payload, list))
2220 eq(len(payload), 1)
2221 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002222 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002223 eq(submsg['subject'], 'An enclosed message')
2224 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2225
2226 def test_dsn(self):
2227 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002228 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002229 # msg 16 is a Delivery Status Notification, see RFC 1894
2230 msg = self._msgobj('msg_16.txt')
2231 eq(msg.get_content_type(), 'multipart/report')
2232 unless(msg.is_multipart())
2233 eq(len(msg.get_payload()), 3)
2234 # Subpart 1 is a text/plain, human readable section
2235 subpart = msg.get_payload(0)
2236 eq(subpart.get_content_type(), 'text/plain')
2237 eq(subpart.get_payload(), """\
2238This report relates to a message you sent with the following header fields:
2239
2240 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2241 Date: Sun, 23 Sep 2001 20:10:55 -0700
2242 From: "Ian T. Henry" <henryi@oxy.edu>
2243 To: SoCal Raves <scr@socal-raves.org>
2244 Subject: [scr] yeah for Ians!!
2245
2246Your message cannot be delivered to the following recipients:
2247
2248 Recipient address: jangel1@cougar.noc.ucla.edu
2249 Reason: recipient reached disk quota
2250
2251""")
2252 # Subpart 2 contains the machine parsable DSN information. It
2253 # consists of two blocks of headers, represented by two nested Message
2254 # objects.
2255 subpart = msg.get_payload(1)
2256 eq(subpart.get_content_type(), 'message/delivery-status')
2257 eq(len(subpart.get_payload()), 2)
2258 # message/delivery-status should treat each block as a bunch of
2259 # headers, i.e. a bunch of Message objects.
2260 dsn1 = subpart.get_payload(0)
2261 unless(isinstance(dsn1, Message))
2262 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2263 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2264 # Try a missing one <wink>
2265 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2266 dsn2 = subpart.get_payload(1)
2267 unless(isinstance(dsn2, Message))
2268 eq(dsn2['action'], 'failed')
2269 eq(dsn2.get_params(header='original-recipient'),
2270 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2271 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2272 # Subpart 3 is the original message
2273 subpart = msg.get_payload(2)
2274 eq(subpart.get_content_type(), 'message/rfc822')
2275 payload = subpart.get_payload()
2276 unless(isinstance(payload, list))
2277 eq(len(payload), 1)
2278 subsubpart = payload[0]
2279 unless(isinstance(subsubpart, Message))
2280 eq(subsubpart.get_content_type(), 'text/plain')
2281 eq(subsubpart['message-id'],
2282 '<002001c144a6$8752e060$56104586@oxy.edu>')
2283
2284 def test_epilogue(self):
2285 eq = self.ndiffAssertEqual
2286 with openfile('msg_21.txt') as fp:
2287 text = fp.read()
2288 msg = Message()
2289 msg['From'] = 'aperson@dom.ain'
2290 msg['To'] = 'bperson@dom.ain'
2291 msg['Subject'] = 'Test'
2292 msg.preamble = 'MIME message'
2293 msg.epilogue = 'End of MIME message\n'
2294 msg1 = MIMEText('One')
2295 msg2 = MIMEText('Two')
2296 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2297 msg.attach(msg1)
2298 msg.attach(msg2)
2299 sfp = StringIO()
2300 g = Generator(sfp)
2301 g.flatten(msg)
2302 eq(sfp.getvalue(), text)
2303
2304 def test_no_nl_preamble(self):
2305 eq = self.ndiffAssertEqual
2306 msg = Message()
2307 msg['From'] = 'aperson@dom.ain'
2308 msg['To'] = 'bperson@dom.ain'
2309 msg['Subject'] = 'Test'
2310 msg.preamble = 'MIME message'
2311 msg.epilogue = ''
2312 msg1 = MIMEText('One')
2313 msg2 = MIMEText('Two')
2314 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2315 msg.attach(msg1)
2316 msg.attach(msg2)
2317 eq(msg.as_string(), """\
2318From: aperson@dom.ain
2319To: bperson@dom.ain
2320Subject: Test
2321Content-Type: multipart/mixed; boundary="BOUNDARY"
2322
2323MIME message
2324--BOUNDARY
2325Content-Type: text/plain; charset="us-ascii"
2326MIME-Version: 1.0
2327Content-Transfer-Encoding: 7bit
2328
2329One
2330--BOUNDARY
2331Content-Type: text/plain; charset="us-ascii"
2332MIME-Version: 1.0
2333Content-Transfer-Encoding: 7bit
2334
2335Two
2336--BOUNDARY--
2337""")
2338
2339 def test_default_type(self):
2340 eq = self.assertEqual
2341 with openfile('msg_30.txt') as fp:
2342 msg = email.message_from_file(fp)
2343 container1 = msg.get_payload(0)
2344 eq(container1.get_default_type(), 'message/rfc822')
2345 eq(container1.get_content_type(), 'message/rfc822')
2346 container2 = msg.get_payload(1)
2347 eq(container2.get_default_type(), 'message/rfc822')
2348 eq(container2.get_content_type(), 'message/rfc822')
2349 container1a = container1.get_payload(0)
2350 eq(container1a.get_default_type(), 'text/plain')
2351 eq(container1a.get_content_type(), 'text/plain')
2352 container2a = container2.get_payload(0)
2353 eq(container2a.get_default_type(), 'text/plain')
2354 eq(container2a.get_content_type(), 'text/plain')
2355
2356 def test_default_type_with_explicit_container_type(self):
2357 eq = self.assertEqual
2358 with openfile('msg_28.txt') as fp:
2359 msg = email.message_from_file(fp)
2360 container1 = msg.get_payload(0)
2361 eq(container1.get_default_type(), 'message/rfc822')
2362 eq(container1.get_content_type(), 'message/rfc822')
2363 container2 = msg.get_payload(1)
2364 eq(container2.get_default_type(), 'message/rfc822')
2365 eq(container2.get_content_type(), 'message/rfc822')
2366 container1a = container1.get_payload(0)
2367 eq(container1a.get_default_type(), 'text/plain')
2368 eq(container1a.get_content_type(), 'text/plain')
2369 container2a = container2.get_payload(0)
2370 eq(container2a.get_default_type(), 'text/plain')
2371 eq(container2a.get_content_type(), 'text/plain')
2372
2373 def test_default_type_non_parsed(self):
2374 eq = self.assertEqual
2375 neq = self.ndiffAssertEqual
2376 # Set up container
2377 container = MIMEMultipart('digest', 'BOUNDARY')
2378 container.epilogue = ''
2379 # Set up subparts
2380 subpart1a = MIMEText('message 1\n')
2381 subpart2a = MIMEText('message 2\n')
2382 subpart1 = MIMEMessage(subpart1a)
2383 subpart2 = MIMEMessage(subpart2a)
2384 container.attach(subpart1)
2385 container.attach(subpart2)
2386 eq(subpart1.get_content_type(), 'message/rfc822')
2387 eq(subpart1.get_default_type(), 'message/rfc822')
2388 eq(subpart2.get_content_type(), 'message/rfc822')
2389 eq(subpart2.get_default_type(), 'message/rfc822')
2390 neq(container.as_string(0), '''\
2391Content-Type: multipart/digest; boundary="BOUNDARY"
2392MIME-Version: 1.0
2393
2394--BOUNDARY
2395Content-Type: message/rfc822
2396MIME-Version: 1.0
2397
2398Content-Type: text/plain; charset="us-ascii"
2399MIME-Version: 1.0
2400Content-Transfer-Encoding: 7bit
2401
2402message 1
2403
2404--BOUNDARY
2405Content-Type: message/rfc822
2406MIME-Version: 1.0
2407
2408Content-Type: text/plain; charset="us-ascii"
2409MIME-Version: 1.0
2410Content-Transfer-Encoding: 7bit
2411
2412message 2
2413
2414--BOUNDARY--
2415''')
2416 del subpart1['content-type']
2417 del subpart1['mime-version']
2418 del subpart2['content-type']
2419 del subpart2['mime-version']
2420 eq(subpart1.get_content_type(), 'message/rfc822')
2421 eq(subpart1.get_default_type(), 'message/rfc822')
2422 eq(subpart2.get_content_type(), 'message/rfc822')
2423 eq(subpart2.get_default_type(), 'message/rfc822')
2424 neq(container.as_string(0), '''\
2425Content-Type: multipart/digest; boundary="BOUNDARY"
2426MIME-Version: 1.0
2427
2428--BOUNDARY
2429
2430Content-Type: text/plain; charset="us-ascii"
2431MIME-Version: 1.0
2432Content-Transfer-Encoding: 7bit
2433
2434message 1
2435
2436--BOUNDARY
2437
2438Content-Type: text/plain; charset="us-ascii"
2439MIME-Version: 1.0
2440Content-Transfer-Encoding: 7bit
2441
2442message 2
2443
2444--BOUNDARY--
2445''')
2446
2447 def test_mime_attachments_in_constructor(self):
2448 eq = self.assertEqual
2449 text1 = MIMEText('')
2450 text2 = MIMEText('')
2451 msg = MIMEMultipart(_subparts=(text1, text2))
2452 eq(len(msg.get_payload()), 2)
2453 eq(msg.get_payload(0), text1)
2454 eq(msg.get_payload(1), text2)
2455
Christian Heimes587c2bf2008-01-19 16:21:02 +00002456 def test_default_multipart_constructor(self):
2457 msg = MIMEMultipart()
2458 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002459
Ezio Melottib3aedd42010-11-20 19:04:17 +00002460
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002461# A general test of parser->model->generator idempotency. IOW, read a message
2462# in, parse it into a message object tree, then without touching the tree,
2463# regenerate the plain text. The original text and the transformed text
2464# should be identical. Note: that we ignore the Unix-From since that may
2465# contain a changed date.
2466class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002467
2468 linesep = '\n'
2469
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002470 def _msgobj(self, filename):
2471 with openfile(filename) as fp:
2472 data = fp.read()
2473 msg = email.message_from_string(data)
2474 return msg, data
2475
R. David Murray719a4492010-11-21 16:53:48 +00002476 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002477 eq = self.ndiffAssertEqual
2478 s = StringIO()
2479 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002480 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002481 eq(text, s.getvalue())
2482
2483 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002484 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002485 msg, text = self._msgobj('msg_01.txt')
2486 eq(msg.get_content_type(), 'text/plain')
2487 eq(msg.get_content_maintype(), 'text')
2488 eq(msg.get_content_subtype(), 'plain')
2489 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2490 eq(msg.get_param('charset'), 'us-ascii')
2491 eq(msg.preamble, None)
2492 eq(msg.epilogue, None)
2493 self._idempotent(msg, text)
2494
2495 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002496 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002497 msg, text = self._msgobj('msg_03.txt')
2498 eq(msg.get_content_type(), 'text/plain')
2499 eq(msg.get_params(), None)
2500 eq(msg.get_param('charset'), None)
2501 self._idempotent(msg, text)
2502
2503 def test_simple_multipart(self):
2504 msg, text = self._msgobj('msg_04.txt')
2505 self._idempotent(msg, text)
2506
2507 def test_MIME_digest(self):
2508 msg, text = self._msgobj('msg_02.txt')
2509 self._idempotent(msg, text)
2510
2511 def test_long_header(self):
2512 msg, text = self._msgobj('msg_27.txt')
2513 self._idempotent(msg, text)
2514
2515 def test_MIME_digest_with_part_headers(self):
2516 msg, text = self._msgobj('msg_28.txt')
2517 self._idempotent(msg, text)
2518
2519 def test_mixed_with_image(self):
2520 msg, text = self._msgobj('msg_06.txt')
2521 self._idempotent(msg, text)
2522
2523 def test_multipart_report(self):
2524 msg, text = self._msgobj('msg_05.txt')
2525 self._idempotent(msg, text)
2526
2527 def test_dsn(self):
2528 msg, text = self._msgobj('msg_16.txt')
2529 self._idempotent(msg, text)
2530
2531 def test_preamble_epilogue(self):
2532 msg, text = self._msgobj('msg_21.txt')
2533 self._idempotent(msg, text)
2534
2535 def test_multipart_one_part(self):
2536 msg, text = self._msgobj('msg_23.txt')
2537 self._idempotent(msg, text)
2538
2539 def test_multipart_no_parts(self):
2540 msg, text = self._msgobj('msg_24.txt')
2541 self._idempotent(msg, text)
2542
2543 def test_no_start_boundary(self):
2544 msg, text = self._msgobj('msg_31.txt')
2545 self._idempotent(msg, text)
2546
2547 def test_rfc2231_charset(self):
2548 msg, text = self._msgobj('msg_32.txt')
2549 self._idempotent(msg, text)
2550
2551 def test_more_rfc2231_parameters(self):
2552 msg, text = self._msgobj('msg_33.txt')
2553 self._idempotent(msg, text)
2554
2555 def test_text_plain_in_a_multipart_digest(self):
2556 msg, text = self._msgobj('msg_34.txt')
2557 self._idempotent(msg, text)
2558
2559 def test_nested_multipart_mixeds(self):
2560 msg, text = self._msgobj('msg_12a.txt')
2561 self._idempotent(msg, text)
2562
2563 def test_message_external_body_idempotent(self):
2564 msg, text = self._msgobj('msg_36.txt')
2565 self._idempotent(msg, text)
2566
R. David Murray719a4492010-11-21 16:53:48 +00002567 def test_message_delivery_status(self):
2568 msg, text = self._msgobj('msg_43.txt')
2569 self._idempotent(msg, text, unixfrom=True)
2570
R. David Murray96fd54e2010-10-08 15:55:28 +00002571 def test_message_signed_idempotent(self):
2572 msg, text = self._msgobj('msg_45.txt')
2573 self._idempotent(msg, text)
2574
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002575 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002576 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002577 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002578 # Get a message object and reset the seek pointer for other tests
2579 msg, text = self._msgobj('msg_05.txt')
2580 eq(msg.get_content_type(), 'multipart/report')
2581 # Test the Content-Type: parameters
2582 params = {}
2583 for pk, pv in msg.get_params():
2584 params[pk] = pv
2585 eq(params['report-type'], 'delivery-status')
2586 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002587 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2588 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002589 eq(len(msg.get_payload()), 3)
2590 # Make sure the subparts are what we expect
2591 msg1 = msg.get_payload(0)
2592 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002593 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002594 msg2 = msg.get_payload(1)
2595 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002596 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002597 msg3 = msg.get_payload(2)
2598 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002599 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002600 payload = msg3.get_payload()
2601 unless(isinstance(payload, list))
2602 eq(len(payload), 1)
2603 msg4 = payload[0]
2604 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002605 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002606
2607 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002608 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002609 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002610 msg, text = self._msgobj('msg_06.txt')
2611 # Check some of the outer headers
2612 eq(msg.get_content_type(), 'message/rfc822')
2613 # Make sure the payload is a list of exactly one sub-Message, and that
2614 # that submessage has a type of text/plain
2615 payload = msg.get_payload()
2616 unless(isinstance(payload, list))
2617 eq(len(payload), 1)
2618 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002619 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002620 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002621 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002622 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002623
2624
Ezio Melottib3aedd42010-11-20 19:04:17 +00002625
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002626# Test various other bits of the package's functionality
2627class TestMiscellaneous(TestEmailBase):
2628 def test_message_from_string(self):
2629 with openfile('msg_01.txt') as fp:
2630 text = fp.read()
2631 msg = email.message_from_string(text)
2632 s = StringIO()
2633 # Don't wrap/continue long headers since we're trying to test
2634 # idempotency.
2635 g = Generator(s, maxheaderlen=0)
2636 g.flatten(msg)
2637 self.assertEqual(text, s.getvalue())
2638
2639 def test_message_from_file(self):
2640 with openfile('msg_01.txt') as fp:
2641 text = fp.read()
2642 fp.seek(0)
2643 msg = email.message_from_file(fp)
2644 s = StringIO()
2645 # Don't wrap/continue long headers since we're trying to test
2646 # idempotency.
2647 g = Generator(s, maxheaderlen=0)
2648 g.flatten(msg)
2649 self.assertEqual(text, s.getvalue())
2650
2651 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002652 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002653 with openfile('msg_01.txt') as fp:
2654 text = fp.read()
2655
2656 # Create a subclass
2657 class MyMessage(Message):
2658 pass
2659
2660 msg = email.message_from_string(text, MyMessage)
2661 unless(isinstance(msg, MyMessage))
2662 # Try something more complicated
2663 with openfile('msg_02.txt') as fp:
2664 text = fp.read()
2665 msg = email.message_from_string(text, MyMessage)
2666 for subpart in msg.walk():
2667 unless(isinstance(subpart, MyMessage))
2668
2669 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002670 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002671 # Create a subclass
2672 class MyMessage(Message):
2673 pass
2674
2675 with openfile('msg_01.txt') as fp:
2676 msg = email.message_from_file(fp, MyMessage)
2677 unless(isinstance(msg, MyMessage))
2678 # Try something more complicated
2679 with openfile('msg_02.txt') as fp:
2680 msg = email.message_from_file(fp, MyMessage)
2681 for subpart in msg.walk():
2682 unless(isinstance(subpart, MyMessage))
2683
R David Murrayc27e5222012-05-25 15:01:48 -04002684 def test_custom_message_does_not_require_arguments(self):
2685 class MyMessage(Message):
2686 def __init__(self):
2687 super().__init__()
2688 msg = self._str_msg("Subject: test\n\ntest", MyMessage)
2689 self.assertTrue(isinstance(msg, MyMessage))
2690
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002691 def test__all__(self):
2692 module = __import__('email')
R David Murray1b6c7242012-03-16 22:43:05 -04002693 self.assertEqual(sorted(module.__all__), [
2694 'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2695 'generator', 'header', 'iterators', 'message',
2696 'message_from_binary_file', 'message_from_bytes',
2697 'message_from_file', 'message_from_string', 'mime', 'parser',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002698 'quoprimime', 'utils',
2699 ])
2700
2701 def test_formatdate(self):
2702 now = time.time()
2703 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2704 time.gmtime(now)[:6])
2705
2706 def test_formatdate_localtime(self):
2707 now = time.time()
2708 self.assertEqual(
2709 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2710 time.localtime(now)[:6])
2711
2712 def test_formatdate_usegmt(self):
2713 now = time.time()
2714 self.assertEqual(
2715 utils.formatdate(now, localtime=False),
2716 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2717 self.assertEqual(
2718 utils.formatdate(now, localtime=False, usegmt=True),
2719 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2720
Georg Brandl1aca31e2012-09-22 09:03:56 +02002721 # parsedate and parsedate_tz will become deprecated interfaces someday
2722 def test_parsedate_returns_None_for_invalid_strings(self):
2723 self.assertIsNone(utils.parsedate(''))
2724 self.assertIsNone(utils.parsedate_tz(''))
2725 self.assertIsNone(utils.parsedate('0'))
2726 self.assertIsNone(utils.parsedate_tz('0'))
2727 self.assertIsNone(utils.parsedate('A Complete Waste of Time'))
2728 self.assertIsNone(utils.parsedate_tz('A Complete Waste of Time'))
2729 # Not a part of the spec but, but this has historically worked:
2730 self.assertIsNone(utils.parsedate(None))
2731 self.assertIsNone(utils.parsedate_tz(None))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002732
2733 def test_parsedate_compact(self):
2734 # The FWS after the comma is optional
2735 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2736 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2737
2738 def test_parsedate_no_dayofweek(self):
2739 eq = self.assertEqual
2740 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2741 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2742
2743 def test_parsedate_compact_no_dayofweek(self):
2744 eq = self.assertEqual
2745 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2746 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2747
R. David Murray4a62e892010-12-23 20:35:46 +00002748 def test_parsedate_no_space_before_positive_offset(self):
2749 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2750 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2751
2752 def test_parsedate_no_space_before_negative_offset(self):
2753 # Issue 1155362: we already handled '+' for this case.
2754 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2755 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2756
2757
R David Murrayaccd1c02011-03-13 20:06:23 -04002758 def test_parsedate_accepts_time_with_dots(self):
2759 eq = self.assertEqual
2760 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2761 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2762 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2763 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2764
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002765 def test_parsedate_acceptable_to_time_functions(self):
2766 eq = self.assertEqual
2767 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2768 t = int(time.mktime(timetup))
2769 eq(time.localtime(t)[:6], timetup[:6])
2770 eq(int(time.strftime('%Y', timetup)), 2003)
2771 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2772 t = int(time.mktime(timetup[:9]))
2773 eq(time.localtime(t)[:6], timetup[:6])
2774 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2775
Alexander Belopolskya07548e2012-06-21 20:34:09 -04002776 def test_mktime_tz(self):
2777 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2778 -1, -1, -1, 0)), 0)
2779 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2780 -1, -1, -1, 1234)), -1234)
2781
R. David Murray219d1c82010-08-25 00:45:55 +00002782 def test_parsedate_y2k(self):
2783 """Test for parsing a date with a two-digit year.
2784
2785 Parsing a date with a two-digit year should return the correct
2786 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2787 obsoletes RFC822) requires four-digit years.
2788
2789 """
2790 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2791 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2792 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2793 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2794
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002795 def test_parseaddr_empty(self):
2796 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2797 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2798
2799 def test_noquote_dump(self):
2800 self.assertEqual(
2801 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2802 'A Silly Person <person@dom.ain>')
2803
2804 def test_escape_dump(self):
2805 self.assertEqual(
2806 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
R David Murrayb53319f2012-03-14 15:31:47 -04002807 r'"A (Very) Silly Person" <person@dom.ain>')
2808 self.assertEqual(
2809 utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
2810 ('A (Very) Silly Person', 'person@dom.ain'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002811 a = r'A \(Special\) Person'
2812 b = 'person@dom.ain'
2813 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2814
2815 def test_escape_backslashes(self):
2816 self.assertEqual(
2817 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2818 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2819 a = r'Arthur \Backslash\ Foobar'
2820 b = 'person@dom.ain'
2821 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2822
R David Murray8debacb2011-04-06 09:35:57 -04002823 def test_quotes_unicode_names(self):
2824 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2825 name = "H\u00e4ns W\u00fcrst"
2826 addr = 'person@dom.ain'
2827 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2828 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
2829 self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
2830 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
2831 latin1_quopri)
2832
2833 def test_accepts_any_charset_like_object(self):
2834 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2835 name = "H\u00e4ns W\u00fcrst"
2836 addr = 'person@dom.ain'
2837 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2838 foobar = "FOOBAR"
2839 class CharsetMock:
2840 def header_encode(self, string):
2841 return foobar
2842 mock = CharsetMock()
2843 mock_expected = "%s <%s>" % (foobar, addr)
2844 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
2845 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
2846 utf8_base64)
2847
2848 def test_invalid_charset_like_object_raises_error(self):
2849 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2850 name = "H\u00e4ns W\u00fcrst"
2851 addr = 'person@dom.ain'
2852 # A object without a header_encode method:
2853 bad_charset = object()
2854 self.assertRaises(AttributeError, utils.formataddr, (name, addr),
2855 bad_charset)
2856
2857 def test_unicode_address_raises_error(self):
2858 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2859 addr = 'pers\u00f6n@dom.in'
2860 self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
2861 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
2862
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002863 def test_name_with_dot(self):
2864 x = 'John X. Doe <jxd@example.com>'
2865 y = '"John X. Doe" <jxd@example.com>'
2866 a, b = ('John X. Doe', 'jxd@example.com')
2867 self.assertEqual(utils.parseaddr(x), (a, b))
2868 self.assertEqual(utils.parseaddr(y), (a, b))
2869 # formataddr() quotes the name if there's a dot in it
2870 self.assertEqual(utils.formataddr((a, b)), y)
2871
R. David Murray5397e862010-10-02 15:58:26 +00002872 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2873 # issue 10005. Note that in the third test the second pair of
2874 # backslashes is not actually a quoted pair because it is not inside a
2875 # comment or quoted string: the address being parsed has a quoted
2876 # string containing a quoted backslash, followed by 'example' and two
2877 # backslashes, followed by another quoted string containing a space and
2878 # the word 'example'. parseaddr copies those two backslashes
2879 # literally. Per rfc5322 this is not technically correct since a \ may
2880 # not appear in an address outside of a quoted string. It is probably
2881 # a sensible Postel interpretation, though.
2882 eq = self.assertEqual
2883 eq(utils.parseaddr('""example" example"@example.com'),
2884 ('', '""example" example"@example.com'))
2885 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2886 ('', '"\\"example\\" example"@example.com'))
2887 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2888 ('', '"\\\\"example\\\\" example"@example.com'))
2889
R. David Murray63563cd2010-12-18 18:25:38 +00002890 def test_parseaddr_preserves_spaces_in_local_part(self):
2891 # issue 9286. A normal RFC5322 local part should not contain any
2892 # folding white space, but legacy local parts can (they are a sequence
2893 # of atoms, not dotatoms). On the other hand we strip whitespace from
2894 # before the @ and around dots, on the assumption that the whitespace
2895 # around the punctuation is a mistake in what would otherwise be
2896 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2897 self.assertEqual(('', "merwok wok@xample.com"),
2898 utils.parseaddr("merwok wok@xample.com"))
2899 self.assertEqual(('', "merwok wok@xample.com"),
2900 utils.parseaddr("merwok wok@xample.com"))
2901 self.assertEqual(('', "merwok wok@xample.com"),
2902 utils.parseaddr(" merwok wok @xample.com"))
2903 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2904 utils.parseaddr('merwok"wok" wok@xample.com'))
2905 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2906 utils.parseaddr('merwok. wok . wok@xample.com'))
2907
R David Murrayb53319f2012-03-14 15:31:47 -04002908 def test_formataddr_does_not_quote_parens_in_quoted_string(self):
2909 addr = ("'foo@example.com' (foo@example.com)",
2910 'foo@example.com')
2911 addrstr = ('"\'foo@example.com\' '
2912 '(foo@example.com)" <foo@example.com>')
2913 self.assertEqual(utils.parseaddr(addrstr), addr)
2914 self.assertEqual(utils.formataddr(addr), addrstr)
2915
2916
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002917 def test_multiline_from_comment(self):
2918 x = """\
2919Foo
2920\tBar <foo@example.com>"""
2921 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2922
2923 def test_quote_dump(self):
2924 self.assertEqual(
2925 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2926 r'"A Silly; Person" <person@dom.ain>')
2927
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002928 def test_charset_richcomparisons(self):
2929 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002930 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002931 cset1 = Charset()
2932 cset2 = Charset()
2933 eq(cset1, 'us-ascii')
2934 eq(cset1, 'US-ASCII')
2935 eq(cset1, 'Us-AsCiI')
2936 eq('us-ascii', cset1)
2937 eq('US-ASCII', cset1)
2938 eq('Us-AsCiI', cset1)
2939 ne(cset1, 'usascii')
2940 ne(cset1, 'USASCII')
2941 ne(cset1, 'UsAsCiI')
2942 ne('usascii', cset1)
2943 ne('USASCII', cset1)
2944 ne('UsAsCiI', cset1)
2945 eq(cset1, cset2)
2946 eq(cset2, cset1)
2947
2948 def test_getaddresses(self):
2949 eq = self.assertEqual
2950 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2951 'Bud Person <bperson@dom.ain>']),
2952 [('Al Person', 'aperson@dom.ain'),
2953 ('Bud Person', 'bperson@dom.ain')])
2954
2955 def test_getaddresses_nasty(self):
2956 eq = self.assertEqual
2957 eq(utils.getaddresses(['foo: ;']), [('', '')])
2958 eq(utils.getaddresses(
2959 ['[]*-- =~$']),
2960 [('', ''), ('', ''), ('', '*--')])
2961 eq(utils.getaddresses(
2962 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2963 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2964
2965 def test_getaddresses_embedded_comment(self):
2966 """Test proper handling of a nested comment"""
2967 eq = self.assertEqual
2968 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2969 eq(addrs[0][1], 'foo@bar.com')
2970
2971 def test_utils_quote_unquote(self):
2972 eq = self.assertEqual
2973 msg = Message()
2974 msg.add_header('content-disposition', 'attachment',
2975 filename='foo\\wacky"name')
2976 eq(msg.get_filename(), 'foo\\wacky"name')
2977
2978 def test_get_body_encoding_with_bogus_charset(self):
2979 charset = Charset('not a charset')
2980 self.assertEqual(charset.get_body_encoding(), 'base64')
2981
2982 def test_get_body_encoding_with_uppercase_charset(self):
2983 eq = self.assertEqual
2984 msg = Message()
2985 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2986 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2987 charsets = msg.get_charsets()
2988 eq(len(charsets), 1)
2989 eq(charsets[0], 'utf-8')
2990 charset = Charset(charsets[0])
2991 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002992 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002993 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2994 eq(msg.get_payload(decode=True), b'hello world')
2995 eq(msg['content-transfer-encoding'], 'base64')
2996 # Try another one
2997 msg = Message()
2998 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2999 charsets = msg.get_charsets()
3000 eq(len(charsets), 1)
3001 eq(charsets[0], 'us-ascii')
3002 charset = Charset(charsets[0])
3003 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
3004 msg.set_payload('hello world', charset=charset)
3005 eq(msg.get_payload(), 'hello world')
3006 eq(msg['content-transfer-encoding'], '7bit')
3007
3008 def test_charsets_case_insensitive(self):
3009 lc = Charset('us-ascii')
3010 uc = Charset('US-ASCII')
3011 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
3012
3013 def test_partial_falls_inside_message_delivery_status(self):
3014 eq = self.ndiffAssertEqual
3015 # The Parser interface provides chunks of data to FeedParser in 8192
3016 # byte gulps. SF bug #1076485 found one of those chunks inside
3017 # message/delivery-status header block, which triggered an
3018 # unreadline() of NeedMoreData.
3019 msg = self._msgobj('msg_43.txt')
3020 sfp = StringIO()
3021 iterators._structure(msg, sfp)
3022 eq(sfp.getvalue(), """\
3023multipart/report
3024 text/plain
3025 message/delivery-status
3026 text/plain
3027 text/plain
3028 text/plain
3029 text/plain
3030 text/plain
3031 text/plain
3032 text/plain
3033 text/plain
3034 text/plain
3035 text/plain
3036 text/plain
3037 text/plain
3038 text/plain
3039 text/plain
3040 text/plain
3041 text/plain
3042 text/plain
3043 text/plain
3044 text/plain
3045 text/plain
3046 text/plain
3047 text/plain
3048 text/plain
3049 text/plain
3050 text/plain
3051 text/plain
3052 text/rfc822-headers
3053""")
3054
R. David Murraya0b44b52010-12-02 21:47:19 +00003055 def test_make_msgid_domain(self):
3056 self.assertEqual(
3057 email.utils.make_msgid(domain='testdomain-string')[-19:],
3058 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003059
Ezio Melottib3aedd42010-11-20 19:04:17 +00003060
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003061# Test the iterator/generators
3062class TestIterators(TestEmailBase):
3063 def test_body_line_iterator(self):
3064 eq = self.assertEqual
3065 neq = self.ndiffAssertEqual
3066 # First a simple non-multipart message
3067 msg = self._msgobj('msg_01.txt')
3068 it = iterators.body_line_iterator(msg)
3069 lines = list(it)
3070 eq(len(lines), 6)
3071 neq(EMPTYSTRING.join(lines), msg.get_payload())
3072 # Now a more complicated multipart
3073 msg = self._msgobj('msg_02.txt')
3074 it = iterators.body_line_iterator(msg)
3075 lines = list(it)
3076 eq(len(lines), 43)
3077 with openfile('msg_19.txt') as fp:
3078 neq(EMPTYSTRING.join(lines), fp.read())
3079
3080 def test_typed_subpart_iterator(self):
3081 eq = self.assertEqual
3082 msg = self._msgobj('msg_04.txt')
3083 it = iterators.typed_subpart_iterator(msg, 'text')
3084 lines = []
3085 subparts = 0
3086 for subpart in it:
3087 subparts += 1
3088 lines.append(subpart.get_payload())
3089 eq(subparts, 2)
3090 eq(EMPTYSTRING.join(lines), """\
3091a simple kind of mirror
3092to reflect upon our own
3093a simple kind of mirror
3094to reflect upon our own
3095""")
3096
3097 def test_typed_subpart_iterator_default_type(self):
3098 eq = self.assertEqual
3099 msg = self._msgobj('msg_03.txt')
3100 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
3101 lines = []
3102 subparts = 0
3103 for subpart in it:
3104 subparts += 1
3105 lines.append(subpart.get_payload())
3106 eq(subparts, 1)
3107 eq(EMPTYSTRING.join(lines), """\
3108
3109Hi,
3110
3111Do you like this message?
3112
3113-Me
3114""")
3115
R. David Murray45bf773f2010-07-17 01:19:57 +00003116 def test_pushCR_LF(self):
3117 '''FeedParser BufferedSubFile.push() assumed it received complete
3118 line endings. A CR ending one push() followed by a LF starting
3119 the next push() added an empty line.
3120 '''
3121 imt = [
3122 ("a\r \n", 2),
3123 ("b", 0),
3124 ("c\n", 1),
3125 ("", 0),
3126 ("d\r\n", 1),
3127 ("e\r", 0),
3128 ("\nf", 1),
3129 ("\r\n", 1),
3130 ]
3131 from email.feedparser import BufferedSubFile, NeedMoreData
3132 bsf = BufferedSubFile()
3133 om = []
3134 nt = 0
3135 for il, n in imt:
3136 bsf.push(il)
3137 nt += n
3138 n1 = 0
3139 while True:
3140 ol = bsf.readline()
3141 if ol == NeedMoreData:
3142 break
3143 om.append(ol)
3144 n1 += 1
3145 self.assertTrue(n == n1)
3146 self.assertTrue(len(om) == nt)
3147 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
3148
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003149
Ezio Melottib3aedd42010-11-20 19:04:17 +00003150
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003151class TestParsers(TestEmailBase):
R David Murrayb35c8502011-04-13 16:46:05 -04003152
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003153 def test_header_parser(self):
3154 eq = self.assertEqual
3155 # Parse only the headers of a complex multipart MIME document
3156 with openfile('msg_02.txt') as fp:
3157 msg = HeaderParser().parse(fp)
3158 eq(msg['from'], 'ppp-request@zzz.org')
3159 eq(msg['to'], 'ppp@zzz.org')
3160 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003161 self.assertFalse(msg.is_multipart())
3162 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003163
R David Murrayb35c8502011-04-13 16:46:05 -04003164 def test_bytes_header_parser(self):
3165 eq = self.assertEqual
3166 # Parse only the headers of a complex multipart MIME document
3167 with openfile('msg_02.txt', 'rb') as fp:
3168 msg = email.parser.BytesHeaderParser().parse(fp)
3169 eq(msg['from'], 'ppp-request@zzz.org')
3170 eq(msg['to'], 'ppp@zzz.org')
3171 eq(msg.get_content_type(), 'multipart/mixed')
3172 self.assertFalse(msg.is_multipart())
3173 self.assertTrue(isinstance(msg.get_payload(), str))
3174 self.assertTrue(isinstance(msg.get_payload(decode=True), bytes))
3175
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003176 def test_whitespace_continuation(self):
3177 eq = self.assertEqual
3178 # This message contains a line after the Subject: header that has only
3179 # whitespace, but it is not empty!
3180 msg = email.message_from_string("""\
3181From: aperson@dom.ain
3182To: bperson@dom.ain
3183Subject: the next line has a space on it
3184\x20
3185Date: Mon, 8 Apr 2002 15:09:19 -0400
3186Message-ID: spam
3187
3188Here's the message body
3189""")
3190 eq(msg['subject'], 'the next line has a space on it\n ')
3191 eq(msg['message-id'], 'spam')
3192 eq(msg.get_payload(), "Here's the message body\n")
3193
3194 def test_whitespace_continuation_last_header(self):
3195 eq = self.assertEqual
3196 # Like the previous test, but the subject line is the last
3197 # header.
3198 msg = email.message_from_string("""\
3199From: aperson@dom.ain
3200To: bperson@dom.ain
3201Date: Mon, 8 Apr 2002 15:09:19 -0400
3202Message-ID: spam
3203Subject: the next line has a space on it
3204\x20
3205
3206Here's the message body
3207""")
3208 eq(msg['subject'], 'the next line has a space on it\n ')
3209 eq(msg['message-id'], 'spam')
3210 eq(msg.get_payload(), "Here's the message body\n")
3211
3212 def test_crlf_separation(self):
3213 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003214 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003215 msg = Parser().parse(fp)
3216 eq(len(msg.get_payload()), 2)
3217 part1 = msg.get_payload(0)
3218 eq(part1.get_content_type(), 'text/plain')
3219 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3220 part2 = msg.get_payload(1)
3221 eq(part2.get_content_type(), 'application/riscos')
3222
R. David Murray8451c4b2010-10-23 22:19:56 +00003223 def test_crlf_flatten(self):
3224 # Using newline='\n' preserves the crlfs in this input file.
3225 with openfile('msg_26.txt', newline='\n') as fp:
3226 text = fp.read()
3227 msg = email.message_from_string(text)
3228 s = StringIO()
3229 g = Generator(s)
3230 g.flatten(msg, linesep='\r\n')
3231 self.assertEqual(s.getvalue(), text)
3232
3233 maxDiff = None
3234
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003235 def test_multipart_digest_with_extra_mime_headers(self):
3236 eq = self.assertEqual
3237 neq = self.ndiffAssertEqual
3238 with openfile('msg_28.txt') as fp:
3239 msg = email.message_from_file(fp)
3240 # Structure is:
3241 # multipart/digest
3242 # message/rfc822
3243 # text/plain
3244 # message/rfc822
3245 # text/plain
3246 eq(msg.is_multipart(), 1)
3247 eq(len(msg.get_payload()), 2)
3248 part1 = msg.get_payload(0)
3249 eq(part1.get_content_type(), 'message/rfc822')
3250 eq(part1.is_multipart(), 1)
3251 eq(len(part1.get_payload()), 1)
3252 part1a = part1.get_payload(0)
3253 eq(part1a.is_multipart(), 0)
3254 eq(part1a.get_content_type(), 'text/plain')
3255 neq(part1a.get_payload(), 'message 1\n')
3256 # next message/rfc822
3257 part2 = msg.get_payload(1)
3258 eq(part2.get_content_type(), 'message/rfc822')
3259 eq(part2.is_multipart(), 1)
3260 eq(len(part2.get_payload()), 1)
3261 part2a = part2.get_payload(0)
3262 eq(part2a.is_multipart(), 0)
3263 eq(part2a.get_content_type(), 'text/plain')
3264 neq(part2a.get_payload(), 'message 2\n')
3265
3266 def test_three_lines(self):
3267 # A bug report by Andrew McNamara
3268 lines = ['From: Andrew Person <aperson@dom.ain',
3269 'Subject: Test',
3270 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3271 msg = email.message_from_string(NL.join(lines))
3272 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3273
3274 def test_strip_line_feed_and_carriage_return_in_headers(self):
3275 eq = self.assertEqual
3276 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3277 value1 = 'text'
3278 value2 = 'more text'
3279 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3280 value1, value2)
3281 msg = email.message_from_string(m)
3282 eq(msg.get('Header'), value1)
3283 eq(msg.get('Next-Header'), value2)
3284
3285 def test_rfc2822_header_syntax(self):
3286 eq = self.assertEqual
3287 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3288 msg = email.message_from_string(m)
3289 eq(len(msg), 3)
3290 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3291 eq(msg.get_payload(), 'body')
3292
3293 def test_rfc2822_space_not_allowed_in_header(self):
3294 eq = self.assertEqual
3295 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3296 msg = email.message_from_string(m)
3297 eq(len(msg.keys()), 0)
3298
3299 def test_rfc2822_one_character_header(self):
3300 eq = self.assertEqual
3301 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3302 msg = email.message_from_string(m)
3303 headers = msg.keys()
3304 headers.sort()
3305 eq(headers, ['A', 'B', 'CC'])
3306 eq(msg.get_payload(), 'body')
3307
R. David Murray45e0e142010-06-16 02:19:40 +00003308 def test_CRLFLF_at_end_of_part(self):
3309 # issue 5610: feedparser should not eat two chars from body part ending
3310 # with "\r\n\n".
3311 m = (
3312 "From: foo@bar.com\n"
3313 "To: baz\n"
3314 "Mime-Version: 1.0\n"
3315 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3316 "\n"
3317 "--BOUNDARY\n"
3318 "Content-Type: text/plain\n"
3319 "\n"
3320 "body ending with CRLF newline\r\n"
3321 "\n"
3322 "--BOUNDARY--\n"
3323 )
3324 msg = email.message_from_string(m)
3325 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003326
Ezio Melottib3aedd42010-11-20 19:04:17 +00003327
R. David Murray96fd54e2010-10-08 15:55:28 +00003328class Test8BitBytesHandling(unittest.TestCase):
3329 # In Python3 all input is string, but that doesn't work if the actual input
3330 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3331 # decode byte streams using the surrogateescape error handler, and
3332 # reconvert to binary at appropriate places if we detect surrogates. This
3333 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3334 # but it does allow us to parse and preserve them, and to decode body
3335 # parts that use an 8bit CTE.
3336
3337 bodytest_msg = textwrap.dedent("""\
3338 From: foo@bar.com
3339 To: baz
3340 Mime-Version: 1.0
3341 Content-Type: text/plain; charset={charset}
3342 Content-Transfer-Encoding: {cte}
3343
3344 {bodyline}
3345 """)
3346
3347 def test_known_8bit_CTE(self):
3348 m = self.bodytest_msg.format(charset='utf-8',
3349 cte='8bit',
3350 bodyline='pöstal').encode('utf-8')
3351 msg = email.message_from_bytes(m)
3352 self.assertEqual(msg.get_payload(), "pöstal\n")
3353 self.assertEqual(msg.get_payload(decode=True),
3354 "pöstal\n".encode('utf-8'))
3355
3356 def test_unknown_8bit_CTE(self):
3357 m = self.bodytest_msg.format(charset='notavalidcharset',
3358 cte='8bit',
3359 bodyline='pöstal').encode('utf-8')
3360 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003361 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003362 self.assertEqual(msg.get_payload(decode=True),
3363 "pöstal\n".encode('utf-8'))
3364
3365 def test_8bit_in_quopri_body(self):
3366 # This is non-RFC compliant data...without 'decode' the library code
3367 # decodes the body using the charset from the headers, and because the
3368 # source byte really is utf-8 this works. This is likely to fail
3369 # against real dirty data (ie: produce mojibake), but the data is
3370 # invalid anyway so it is as good a guess as any. But this means that
3371 # this test just confirms the current behavior; that behavior is not
3372 # necessarily the best possible behavior. With 'decode' it is
3373 # returning the raw bytes, so that test should be of correct behavior,
3374 # or at least produce the same result that email4 did.
3375 m = self.bodytest_msg.format(charset='utf-8',
3376 cte='quoted-printable',
3377 bodyline='p=C3=B6stál').encode('utf-8')
3378 msg = email.message_from_bytes(m)
3379 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3380 self.assertEqual(msg.get_payload(decode=True),
3381 'pöstál\n'.encode('utf-8'))
3382
3383 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3384 # This is similar to the previous test, but proves that if the 8bit
3385 # byte is undecodeable in the specified charset, it gets replaced
3386 # by the unicode 'unknown' character. Again, this may or may not
3387 # be the ideal behavior. Note that if decode=False none of the
3388 # decoders will get involved, so this is the only test we need
3389 # for this behavior.
3390 m = self.bodytest_msg.format(charset='ascii',
3391 cte='quoted-printable',
3392 bodyline='p=C3=B6stál').encode('utf-8')
3393 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003394 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003395 self.assertEqual(msg.get_payload(decode=True),
3396 'pöstál\n'.encode('utf-8'))
3397
R David Murray80e0aee2012-05-27 21:23:34 -04003398 # test_defect_handling:test_invalid_chars_in_base64_payload
R. David Murray96fd54e2010-10-08 15:55:28 +00003399 def test_8bit_in_base64_body(self):
R David Murray80e0aee2012-05-27 21:23:34 -04003400 # If we get 8bit bytes in a base64 body, we can just ignore them
3401 # as being outside the base64 alphabet and decode anyway. But
3402 # we register a defect.
R. David Murray96fd54e2010-10-08 15:55:28 +00003403 m = self.bodytest_msg.format(charset='utf-8',
3404 cte='base64',
3405 bodyline='cMO2c3RhbAá=').encode('utf-8')
3406 msg = email.message_from_bytes(m)
3407 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -04003408 'pöstal'.encode('utf-8'))
3409 self.assertIsInstance(msg.defects[0],
3410 errors.InvalidBase64CharactersDefect)
R. David Murray96fd54e2010-10-08 15:55:28 +00003411
3412 def test_8bit_in_uuencode_body(self):
3413 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3414 # normal means, so the block is returned undecoded, but as bytes.
3415 m = self.bodytest_msg.format(charset='utf-8',
3416 cte='uuencode',
3417 bodyline='<,.V<W1A; á ').encode('utf-8')
3418 msg = email.message_from_bytes(m)
3419 self.assertEqual(msg.get_payload(decode=True),
3420 '<,.V<W1A; á \n'.encode('utf-8'))
3421
3422
R. David Murray92532142011-01-07 23:25:30 +00003423 headertest_headers = (
3424 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3425 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3426 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3427 '\tJean de Baddie',
3428 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3429 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3430 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3431 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3432 )
3433 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3434 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003435
3436 def test_get_8bit_header(self):
3437 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003438 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3439 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003440
3441 def test_print_8bit_headers(self):
3442 msg = email.message_from_bytes(self.headertest_msg)
3443 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003444 textwrap.dedent("""\
3445 From: {}
3446 To: {}
3447 Subject: {}
3448 From: {}
3449
3450 Yes, they are flying.
3451 """).format(*[expected[1] for (_, expected) in
3452 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003453
3454 def test_values_with_8bit_headers(self):
3455 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003456 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003457 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003458 'b\uFFFD\uFFFDz',
3459 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3460 'coll\uFFFD\uFFFDgue, le pouf '
3461 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003462 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003463 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003464
3465 def test_items_with_8bit_headers(self):
3466 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003467 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003468 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003469 ('To', 'b\uFFFD\uFFFDz'),
3470 ('Subject', 'Maintenant je vous '
3471 'pr\uFFFD\uFFFDsente '
3472 'mon coll\uFFFD\uFFFDgue, le pouf '
3473 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3474 '\tJean de Baddie'),
3475 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003476
3477 def test_get_all_with_8bit_headers(self):
3478 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003479 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003480 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003481 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003482
R David Murraya2150232011-03-16 21:11:23 -04003483 def test_get_content_type_with_8bit(self):
3484 msg = email.message_from_bytes(textwrap.dedent("""\
3485 Content-Type: text/pl\xA7in; charset=utf-8
3486 """).encode('latin-1'))
3487 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3488 self.assertEqual(msg.get_content_maintype(), "text")
3489 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3490
R David Murray97f43c02012-06-24 05:03:27 -04003491 # test_headerregistry.TestContentTypeHeader.non_ascii_in_params
R David Murraya2150232011-03-16 21:11:23 -04003492 def test_get_params_with_8bit(self):
3493 msg = email.message_from_bytes(
3494 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3495 self.assertEqual(msg.get_params(header='x-header'),
3496 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3497 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3498 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3499 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3500
R David Murray97f43c02012-06-24 05:03:27 -04003501 # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value
R David Murraya2150232011-03-16 21:11:23 -04003502 def test_get_rfc2231_params_with_8bit(self):
3503 msg = email.message_from_bytes(textwrap.dedent("""\
3504 Content-Type: text/plain; charset=us-ascii;
3505 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3506 ).encode('latin-1'))
3507 self.assertEqual(msg.get_param('title'),
3508 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3509
3510 def test_set_rfc2231_params_with_8bit(self):
3511 msg = email.message_from_bytes(textwrap.dedent("""\
3512 Content-Type: text/plain; charset=us-ascii;
3513 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3514 ).encode('latin-1'))
3515 msg.set_param('title', 'test')
3516 self.assertEqual(msg.get_param('title'), 'test')
3517
3518 def test_del_rfc2231_params_with_8bit(self):
3519 msg = email.message_from_bytes(textwrap.dedent("""\
3520 Content-Type: text/plain; charset=us-ascii;
3521 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3522 ).encode('latin-1'))
3523 msg.del_param('title')
3524 self.assertEqual(msg.get_param('title'), None)
3525 self.assertEqual(msg.get_content_maintype(), 'text')
3526
3527 def test_get_payload_with_8bit_cte_header(self):
3528 msg = email.message_from_bytes(textwrap.dedent("""\
3529 Content-Transfer-Encoding: b\xa7se64
3530 Content-Type: text/plain; charset=latin-1
3531
3532 payload
3533 """).encode('latin-1'))
3534 self.assertEqual(msg.get_payload(), 'payload\n')
3535 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3536
R. David Murray96fd54e2010-10-08 15:55:28 +00003537 non_latin_bin_msg = textwrap.dedent("""\
3538 From: foo@bar.com
3539 To: báz
3540 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3541 \tJean de Baddie
3542 Mime-Version: 1.0
3543 Content-Type: text/plain; charset="utf-8"
3544 Content-Transfer-Encoding: 8bit
3545
3546 Да, они летят.
3547 """).encode('utf-8')
3548
3549 def test_bytes_generator(self):
3550 msg = email.message_from_bytes(self.non_latin_bin_msg)
3551 out = BytesIO()
3552 email.generator.BytesGenerator(out).flatten(msg)
3553 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3554
R. David Murray7372a072011-01-26 21:21:32 +00003555 def test_bytes_generator_handles_None_body(self):
3556 #Issue 11019
3557 msg = email.message.Message()
3558 out = BytesIO()
3559 email.generator.BytesGenerator(out).flatten(msg)
3560 self.assertEqual(out.getvalue(), b"\n")
3561
R. David Murray92532142011-01-07 23:25:30 +00003562 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003563 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003564 To: =?unknown-8bit?q?b=C3=A1z?=
3565 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3566 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3567 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003568 Mime-Version: 1.0
3569 Content-Type: text/plain; charset="utf-8"
3570 Content-Transfer-Encoding: base64
3571
3572 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3573 """)
3574
3575 def test_generator_handles_8bit(self):
3576 msg = email.message_from_bytes(self.non_latin_bin_msg)
3577 out = StringIO()
3578 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003579 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003580
3581 def test_bytes_generator_with_unix_from(self):
3582 # The unixfrom contains a current date, so we can't check it
3583 # literally. Just make sure the first word is 'From' and the
3584 # rest of the message matches the input.
3585 msg = email.message_from_bytes(self.non_latin_bin_msg)
3586 out = BytesIO()
3587 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3588 lines = out.getvalue().split(b'\n')
3589 self.assertEqual(lines[0].split()[0], b'From')
3590 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3591
R. David Murray92532142011-01-07 23:25:30 +00003592 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3593 non_latin_bin_msg_as7bit[2:4] = [
3594 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3595 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3596 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3597
R. David Murray96fd54e2010-10-08 15:55:28 +00003598 def test_message_from_binary_file(self):
3599 fn = 'test.msg'
3600 self.addCleanup(unlink, fn)
3601 with open(fn, 'wb') as testfile:
3602 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003603 with open(fn, 'rb') as testfile:
3604 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003605 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3606
3607 latin_bin_msg = textwrap.dedent("""\
3608 From: foo@bar.com
3609 To: Dinsdale
3610 Subject: Nudge nudge, wink, wink
3611 Mime-Version: 1.0
3612 Content-Type: text/plain; charset="latin-1"
3613 Content-Transfer-Encoding: 8bit
3614
3615 oh là là, know what I mean, know what I mean?
3616 """).encode('latin-1')
3617
3618 latin_bin_msg_as7bit = textwrap.dedent("""\
3619 From: foo@bar.com
3620 To: Dinsdale
3621 Subject: Nudge nudge, wink, wink
3622 Mime-Version: 1.0
3623 Content-Type: text/plain; charset="iso-8859-1"
3624 Content-Transfer-Encoding: quoted-printable
3625
3626 oh l=E0 l=E0, know what I mean, know what I mean?
3627 """)
3628
3629 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3630 m = email.message_from_bytes(self.latin_bin_msg)
3631 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3632
3633 def test_decoded_generator_emits_unicode_body(self):
3634 m = email.message_from_bytes(self.latin_bin_msg)
3635 out = StringIO()
3636 email.generator.DecodedGenerator(out).flatten(m)
3637 #DecodedHeader output contains an extra blank line compared
3638 #to the input message. RDM: not sure if this is a bug or not,
3639 #but it is not specific to the 8bit->7bit conversion.
3640 self.assertEqual(out.getvalue(),
3641 self.latin_bin_msg.decode('latin-1')+'\n')
3642
3643 def test_bytes_feedparser(self):
3644 bfp = email.feedparser.BytesFeedParser()
3645 for i in range(0, len(self.latin_bin_msg), 10):
3646 bfp.feed(self.latin_bin_msg[i:i+10])
3647 m = bfp.close()
3648 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3649
R. David Murray8451c4b2010-10-23 22:19:56 +00003650 def test_crlf_flatten(self):
3651 with openfile('msg_26.txt', 'rb') as fp:
3652 text = fp.read()
3653 msg = email.message_from_bytes(text)
3654 s = BytesIO()
3655 g = email.generator.BytesGenerator(s)
3656 g.flatten(msg, linesep='\r\n')
3657 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003658
3659 def test_8bit_multipart(self):
3660 # Issue 11605
3661 source = textwrap.dedent("""\
3662 Date: Fri, 18 Mar 2011 17:15:43 +0100
3663 To: foo@example.com
3664 From: foodwatch-Newsletter <bar@example.com>
3665 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3666 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3667 MIME-Version: 1.0
3668 Content-Type: multipart/alternative;
3669 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3670
3671 --b1_76a486bee62b0d200f33dc2ca08220ad
3672 Content-Type: text/plain; charset="utf-8"
3673 Content-Transfer-Encoding: 8bit
3674
3675 Guten Tag, ,
3676
3677 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3678 Nachrichten aus Japan.
3679
3680
3681 --b1_76a486bee62b0d200f33dc2ca08220ad
3682 Content-Type: text/html; charset="utf-8"
3683 Content-Transfer-Encoding: 8bit
3684
3685 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3686 "http://www.w3.org/TR/html4/loose.dtd">
3687 <html lang="de">
3688 <head>
3689 <title>foodwatch - Newsletter</title>
3690 </head>
3691 <body>
3692 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3693 die Nachrichten aus Japan.</p>
3694 </body>
3695 </html>
3696 --b1_76a486bee62b0d200f33dc2ca08220ad--
3697
3698 """).encode('utf-8')
3699 msg = email.message_from_bytes(source)
3700 s = BytesIO()
3701 g = email.generator.BytesGenerator(s)
3702 g.flatten(msg)
3703 self.assertEqual(s.getvalue(), source)
3704
R David Murray9fd170e2012-03-14 14:05:03 -04003705 def test_bytes_generator_b_encoding_linesep(self):
3706 # Issue 14062: b encoding was tacking on an extra \n.
3707 m = Message()
3708 # This has enough non-ascii that it should always end up b encoded.
3709 m['Subject'] = Header('žluťoučký kůň')
3710 s = BytesIO()
3711 g = email.generator.BytesGenerator(s)
3712 g.flatten(m, linesep='\r\n')
3713 self.assertEqual(
3714 s.getvalue(),
3715 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3716
3717 def test_generator_b_encoding_linesep(self):
3718 # Since this broke in ByteGenerator, test Generator for completeness.
3719 m = Message()
3720 # This has enough non-ascii that it should always end up b encoded.
3721 m['Subject'] = Header('žluťoučký kůň')
3722 s = StringIO()
3723 g = email.generator.Generator(s)
3724 g.flatten(m, linesep='\r\n')
3725 self.assertEqual(
3726 s.getvalue(),
3727 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3728
R. David Murray8451c4b2010-10-23 22:19:56 +00003729 maxDiff = None
3730
Ezio Melottib3aedd42010-11-20 19:04:17 +00003731
R. David Murray719a4492010-11-21 16:53:48 +00003732class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003733
R. David Murraye5db2632010-11-20 15:10:13 +00003734 maxDiff = None
3735
R. David Murray96fd54e2010-10-08 15:55:28 +00003736 def _msgobj(self, filename):
3737 with openfile(filename, 'rb') as fp:
3738 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003739 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003740 msg = email.message_from_bytes(data)
3741 return msg, data
3742
R. David Murray719a4492010-11-21 16:53:48 +00003743 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003744 b = BytesIO()
3745 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003746 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R David Murraya46ed112011-03-31 13:11:40 -04003747 self.assertEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003748
3749
R. David Murray719a4492010-11-21 16:53:48 +00003750class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3751 TestIdempotent):
3752 linesep = '\n'
3753 blinesep = b'\n'
3754 normalize_linesep_regex = re.compile(br'\r\n')
3755
3756
3757class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3758 TestIdempotent):
3759 linesep = '\r\n'
3760 blinesep = b'\r\n'
3761 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3762
Ezio Melottib3aedd42010-11-20 19:04:17 +00003763
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003764class TestBase64(unittest.TestCase):
3765 def test_len(self):
3766 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003767 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003768 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003769 for size in range(15):
3770 if size == 0 : bsize = 0
3771 elif size <= 3 : bsize = 4
3772 elif size <= 6 : bsize = 8
3773 elif size <= 9 : bsize = 12
3774 elif size <= 12: bsize = 16
3775 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003776 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003777
3778 def test_decode(self):
3779 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003780 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003781 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003782
3783 def test_encode(self):
3784 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003785 eq(base64mime.body_encode(b''), b'')
3786 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003787 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003788 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003789 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003790 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003791eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3792eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3793eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3794eHh4eCB4eHh4IA==
3795""")
3796 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003797 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003798 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003799eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3800eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3801eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3802eHh4eCB4eHh4IA==\r
3803""")
3804
3805 def test_header_encode(self):
3806 eq = self.assertEqual
3807 he = base64mime.header_encode
3808 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003809 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3810 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003811 # Test the charset option
3812 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3813 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003814
3815
Ezio Melottib3aedd42010-11-20 19:04:17 +00003816
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003817class TestQuopri(unittest.TestCase):
3818 def setUp(self):
3819 # Set of characters (as byte integers) that don't need to be encoded
3820 # in headers.
3821 self.hlit = list(chain(
3822 range(ord('a'), ord('z') + 1),
3823 range(ord('A'), ord('Z') + 1),
3824 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003825 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003826 # Set of characters (as byte integers) that do need to be encoded in
3827 # headers.
3828 self.hnon = [c for c in range(256) if c not in self.hlit]
3829 assert len(self.hlit) + len(self.hnon) == 256
3830 # Set of characters (as byte integers) that don't need to be encoded
3831 # in bodies.
3832 self.blit = list(range(ord(' '), ord('~') + 1))
3833 self.blit.append(ord('\t'))
3834 self.blit.remove(ord('='))
3835 # Set of characters (as byte integers) that do need to be encoded in
3836 # bodies.
3837 self.bnon = [c for c in range(256) if c not in self.blit]
3838 assert len(self.blit) + len(self.bnon) == 256
3839
Guido van Rossum9604e662007-08-30 03:46:43 +00003840 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003841 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003842 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003843 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003844 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003845 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003846 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003847
Guido van Rossum9604e662007-08-30 03:46:43 +00003848 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003849 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003850 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003851 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003852 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003853 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003854 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003855
3856 def test_header_quopri_len(self):
3857 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003858 eq(quoprimime.header_length(b'hello'), 5)
3859 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003860 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003861 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003862 # =?xxx?q?...?= means 10 extra characters
3863 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003864 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3865 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003866 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003867 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003868 # =?xxx?q?...?= means 10 extra characters
3869 10)
3870 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003871 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003872 'expected length 1 for %r' % chr(c))
3873 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003874 # Space is special; it's encoded to _
3875 if c == ord(' '):
3876 continue
3877 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003878 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003879 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003880
3881 def test_body_quopri_len(self):
3882 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003883 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003884 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003885 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003886 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003887
3888 def test_quote_unquote_idempotent(self):
3889 for x in range(256):
3890 c = chr(x)
3891 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3892
R David Murrayec1b5b82011-03-23 14:19:05 -04003893 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3894 if charset is None:
3895 encoded_header = quoprimime.header_encode(header)
3896 else:
3897 encoded_header = quoprimime.header_encode(header, charset)
3898 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003899
R David Murraycafd79d2011-03-23 15:25:55 -04003900 def test_header_encode_null(self):
3901 self._test_header_encode(b'', '')
3902
R David Murrayec1b5b82011-03-23 14:19:05 -04003903 def test_header_encode_one_word(self):
3904 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3905
3906 def test_header_encode_two_lines(self):
3907 self._test_header_encode(b'hello\nworld',
3908 '=?iso-8859-1?q?hello=0Aworld?=')
3909
3910 def test_header_encode_non_ascii(self):
3911 self._test_header_encode(b'hello\xc7there',
3912 '=?iso-8859-1?q?hello=C7there?=')
3913
3914 def test_header_encode_alt_charset(self):
3915 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3916 charset='iso-8859-2')
3917
3918 def _test_header_decode(self, encoded_header, expected_decoded_header):
3919 decoded_header = quoprimime.header_decode(encoded_header)
3920 self.assertEqual(decoded_header, expected_decoded_header)
3921
3922 def test_header_decode_null(self):
3923 self._test_header_decode('', '')
3924
3925 def test_header_decode_one_word(self):
3926 self._test_header_decode('hello', 'hello')
3927
3928 def test_header_decode_two_lines(self):
3929 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3930
3931 def test_header_decode_non_ascii(self):
3932 self._test_header_decode('hello=C7there', 'hello\xc7there')
3933
3934 def _test_decode(self, encoded, expected_decoded, eol=None):
3935 if eol is None:
3936 decoded = quoprimime.decode(encoded)
3937 else:
3938 decoded = quoprimime.decode(encoded, eol=eol)
3939 self.assertEqual(decoded, expected_decoded)
3940
3941 def test_decode_null_word(self):
3942 self._test_decode('', '')
3943
3944 def test_decode_null_line_null_word(self):
3945 self._test_decode('\r\n', '\n')
3946
3947 def test_decode_one_word(self):
3948 self._test_decode('hello', 'hello')
3949
3950 def test_decode_one_word_eol(self):
3951 self._test_decode('hello', 'hello', eol='X')
3952
3953 def test_decode_one_line(self):
3954 self._test_decode('hello\r\n', 'hello\n')
3955
3956 def test_decode_one_line_lf(self):
3957 self._test_decode('hello\n', 'hello\n')
3958
R David Murraycafd79d2011-03-23 15:25:55 -04003959 def test_decode_one_line_cr(self):
3960 self._test_decode('hello\r', 'hello\n')
3961
3962 def test_decode_one_line_nl(self):
3963 self._test_decode('hello\n', 'helloX', eol='X')
3964
3965 def test_decode_one_line_crnl(self):
3966 self._test_decode('hello\r\n', 'helloX', eol='X')
3967
R David Murrayec1b5b82011-03-23 14:19:05 -04003968 def test_decode_one_line_one_word(self):
3969 self._test_decode('hello\r\nworld', 'hello\nworld')
3970
3971 def test_decode_one_line_one_word_eol(self):
3972 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3973
3974 def test_decode_two_lines(self):
3975 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3976
R David Murraycafd79d2011-03-23 15:25:55 -04003977 def test_decode_two_lines_eol(self):
3978 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3979
R David Murrayec1b5b82011-03-23 14:19:05 -04003980 def test_decode_one_long_line(self):
3981 self._test_decode('Spam' * 250, 'Spam' * 250)
3982
3983 def test_decode_one_space(self):
3984 self._test_decode(' ', '')
3985
3986 def test_decode_multiple_spaces(self):
3987 self._test_decode(' ' * 5, '')
3988
3989 def test_decode_one_line_trailing_spaces(self):
3990 self._test_decode('hello \r\n', 'hello\n')
3991
3992 def test_decode_two_lines_trailing_spaces(self):
3993 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3994
3995 def test_decode_quoted_word(self):
3996 self._test_decode('=22quoted=20words=22', '"quoted words"')
3997
3998 def test_decode_uppercase_quoting(self):
3999 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
4000
4001 def test_decode_lowercase_quoting(self):
4002 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
4003
4004 def test_decode_soft_line_break(self):
4005 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
4006
4007 def test_decode_false_quoting(self):
4008 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
4009
4010 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
4011 kwargs = {}
4012 if maxlinelen is None:
4013 # Use body_encode's default.
4014 maxlinelen = 76
4015 else:
4016 kwargs['maxlinelen'] = maxlinelen
4017 if eol is None:
4018 # Use body_encode's default.
4019 eol = '\n'
4020 else:
4021 kwargs['eol'] = eol
4022 encoded_body = quoprimime.body_encode(body, **kwargs)
4023 self.assertEqual(encoded_body, expected_encoded_body)
4024 if eol == '\n' or eol == '\r\n':
4025 # We know how to split the result back into lines, so maxlinelen
4026 # can be checked.
4027 for line in encoded_body.splitlines():
4028 self.assertLessEqual(len(line), maxlinelen)
4029
4030 def test_encode_null(self):
4031 self._test_encode('', '')
4032
4033 def test_encode_null_lines(self):
4034 self._test_encode('\n\n', '\n\n')
4035
4036 def test_encode_one_line(self):
4037 self._test_encode('hello\n', 'hello\n')
4038
4039 def test_encode_one_line_crlf(self):
4040 self._test_encode('hello\r\n', 'hello\n')
4041
4042 def test_encode_one_line_eol(self):
4043 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
4044
4045 def test_encode_one_space(self):
4046 self._test_encode(' ', '=20')
4047
4048 def test_encode_one_line_one_space(self):
4049 self._test_encode(' \n', '=20\n')
4050
R David Murrayb938c8c2011-03-24 12:19:26 -04004051# XXX: body_encode() expect strings, but uses ord(char) from these strings
4052# to index into a 256-entry list. For code points above 255, this will fail.
4053# Should there be a check for 8-bit only ord() values in body, or at least
4054# a comment about the expected input?
4055
4056 def test_encode_two_lines_one_space(self):
4057 self._test_encode(' \n \n', '=20\n=20\n')
4058
R David Murrayec1b5b82011-03-23 14:19:05 -04004059 def test_encode_one_word_trailing_spaces(self):
4060 self._test_encode('hello ', 'hello =20')
4061
4062 def test_encode_one_line_trailing_spaces(self):
4063 self._test_encode('hello \n', 'hello =20\n')
4064
4065 def test_encode_one_word_trailing_tab(self):
4066 self._test_encode('hello \t', 'hello =09')
4067
4068 def test_encode_one_line_trailing_tab(self):
4069 self._test_encode('hello \t\n', 'hello =09\n')
4070
4071 def test_encode_trailing_space_before_maxlinelen(self):
4072 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
4073
R David Murrayb938c8c2011-03-24 12:19:26 -04004074 def test_encode_trailing_space_at_maxlinelen(self):
4075 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
4076
R David Murrayec1b5b82011-03-23 14:19:05 -04004077 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04004078 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
4079
4080 def test_encode_whitespace_lines(self):
4081 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04004082
4083 def test_encode_quoted_equals(self):
4084 self._test_encode('a = b', 'a =3D b')
4085
4086 def test_encode_one_long_string(self):
4087 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
4088
4089 def test_encode_one_long_line(self):
4090 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4091
4092 def test_encode_one_very_long_line(self):
4093 self._test_encode('x' * 200 + '\n',
4094 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
4095
4096 def test_encode_one_long_line(self):
4097 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4098
4099 def test_encode_shortest_maxlinelen(self):
4100 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004101
R David Murrayb938c8c2011-03-24 12:19:26 -04004102 def test_encode_maxlinelen_too_small(self):
4103 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
4104
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004105 def test_encode(self):
4106 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00004107 eq(quoprimime.body_encode(''), '')
4108 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004109 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00004110 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004111 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00004112 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004113xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
4114 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
4115x xxxx xxxx xxxx xxxx=20""")
4116 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00004117 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4118 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004119xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
4120 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
4121x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00004122 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004123one line
4124
4125two line"""), """\
4126one line
4127
4128two line""")
4129
4130
Ezio Melottib3aedd42010-11-20 19:04:17 +00004131
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004132# Test the Charset class
4133class TestCharset(unittest.TestCase):
4134 def tearDown(self):
4135 from email import charset as CharsetModule
4136 try:
4137 del CharsetModule.CHARSETS['fake']
4138 except KeyError:
4139 pass
4140
Guido van Rossum9604e662007-08-30 03:46:43 +00004141 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004142 eq = self.assertEqual
4143 # Make sure us-ascii = no Unicode conversion
4144 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00004145 eq(c.header_encode('Hello World!'), 'Hello World!')
4146 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004147 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00004148 self.assertRaises(UnicodeError, c.header_encode, s)
4149 c = Charset('utf-8')
4150 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004151
4152 def test_body_encode(self):
4153 eq = self.assertEqual
4154 # Try a charset with QP body encoding
4155 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004156 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004157 # Try a charset with Base64 body encoding
4158 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00004159 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004160 # Try a charset with None body encoding
4161 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004162 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004163 # Try the convert argument, where input codec != output codec
4164 c = Charset('euc-jp')
4165 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00004166 # XXX FIXME
4167## try:
4168## eq('\x1b$B5FCO;~IW\x1b(B',
4169## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4170## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4171## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4172## except LookupError:
4173## # We probably don't have the Japanese codecs installed
4174## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004175 # Testing SF bug #625509, which we have to fake, since there are no
4176 # built-in encodings where the header encoding is QP but the body
4177 # encoding is not.
4178 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04004179 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004180 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04004181 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004182
4183 def test_unicode_charset_name(self):
4184 charset = Charset('us-ascii')
4185 self.assertEqual(str(charset), 'us-ascii')
4186 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4187
4188
Ezio Melottib3aedd42010-11-20 19:04:17 +00004189
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004190# Test multilingual MIME headers.
4191class TestHeader(TestEmailBase):
4192 def test_simple(self):
4193 eq = self.ndiffAssertEqual
4194 h = Header('Hello World!')
4195 eq(h.encode(), 'Hello World!')
4196 h.append(' Goodbye World!')
4197 eq(h.encode(), 'Hello World! Goodbye World!')
4198
4199 def test_simple_surprise(self):
4200 eq = self.ndiffAssertEqual
4201 h = Header('Hello World!')
4202 eq(h.encode(), 'Hello World!')
4203 h.append('Goodbye World!')
4204 eq(h.encode(), 'Hello World! Goodbye World!')
4205
4206 def test_header_needs_no_decoding(self):
4207 h = 'no decoding needed'
4208 self.assertEqual(decode_header(h), [(h, None)])
4209
4210 def test_long(self):
4211 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4212 maxlinelen=76)
4213 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004214 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004215
4216 def test_multilingual(self):
4217 eq = self.ndiffAssertEqual
4218 g = Charset("iso-8859-1")
4219 cz = Charset("iso-8859-2")
4220 utf8 = Charset("utf-8")
4221 g_head = (b'Die Mieter treten hier ein werden mit einem '
4222 b'Foerderband komfortabel den Korridor entlang, '
4223 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4224 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4225 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4226 b'd\xf9vtipu.. ')
4227 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4228 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4229 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4230 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4231 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4232 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4233 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4234 '\u3044\u307e\u3059\u3002')
4235 h = Header(g_head, g)
4236 h.append(cz_head, cz)
4237 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004238 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004239 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004240=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4241 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4242 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4243 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004244 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4245 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4246 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4247 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004248 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4249 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4250 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4251 decoded = decode_header(enc)
4252 eq(len(decoded), 3)
4253 eq(decoded[0], (g_head, 'iso-8859-1'))
4254 eq(decoded[1], (cz_head, 'iso-8859-2'))
4255 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004256 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004257 eq(ustr,
4258 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4259 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4260 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4261 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4262 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4263 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4264 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4265 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4266 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4267 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4268 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4269 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4270 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4271 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4272 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4273 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4274 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004275 # Test make_header()
4276 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004277 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004278
4279 def test_empty_header_encode(self):
4280 h = Header()
4281 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004282
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004283 def test_header_ctor_default_args(self):
4284 eq = self.ndiffAssertEqual
4285 h = Header()
4286 eq(h, '')
4287 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004288 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004289
4290 def test_explicit_maxlinelen(self):
4291 eq = self.ndiffAssertEqual
4292 hstr = ('A very long line that must get split to something other '
4293 'than at the 76th character boundary to test the non-default '
4294 'behavior')
4295 h = Header(hstr)
4296 eq(h.encode(), '''\
4297A very long line that must get split to something other than at the 76th
4298 character boundary to test the non-default behavior''')
4299 eq(str(h), hstr)
4300 h = Header(hstr, header_name='Subject')
4301 eq(h.encode(), '''\
4302A very long line that must get split to something other than at the
4303 76th character boundary to test the non-default behavior''')
4304 eq(str(h), hstr)
4305 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4306 eq(h.encode(), hstr)
4307 eq(str(h), hstr)
4308
Guido van Rossum9604e662007-08-30 03:46:43 +00004309 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004310 eq = self.ndiffAssertEqual
4311 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004312 x = 'xxxx ' * 20
4313 h.append(x)
4314 s = h.encode()
4315 eq(s, """\
4316=?iso-8859-1?q?xxx?=
4317 =?iso-8859-1?q?x_?=
4318 =?iso-8859-1?q?xx?=
4319 =?iso-8859-1?q?xx?=
4320 =?iso-8859-1?q?_x?=
4321 =?iso-8859-1?q?xx?=
4322 =?iso-8859-1?q?x_?=
4323 =?iso-8859-1?q?xx?=
4324 =?iso-8859-1?q?xx?=
4325 =?iso-8859-1?q?_x?=
4326 =?iso-8859-1?q?xx?=
4327 =?iso-8859-1?q?x_?=
4328 =?iso-8859-1?q?xx?=
4329 =?iso-8859-1?q?xx?=
4330 =?iso-8859-1?q?_x?=
4331 =?iso-8859-1?q?xx?=
4332 =?iso-8859-1?q?x_?=
4333 =?iso-8859-1?q?xx?=
4334 =?iso-8859-1?q?xx?=
4335 =?iso-8859-1?q?_x?=
4336 =?iso-8859-1?q?xx?=
4337 =?iso-8859-1?q?x_?=
4338 =?iso-8859-1?q?xx?=
4339 =?iso-8859-1?q?xx?=
4340 =?iso-8859-1?q?_x?=
4341 =?iso-8859-1?q?xx?=
4342 =?iso-8859-1?q?x_?=
4343 =?iso-8859-1?q?xx?=
4344 =?iso-8859-1?q?xx?=
4345 =?iso-8859-1?q?_x?=
4346 =?iso-8859-1?q?xx?=
4347 =?iso-8859-1?q?x_?=
4348 =?iso-8859-1?q?xx?=
4349 =?iso-8859-1?q?xx?=
4350 =?iso-8859-1?q?_x?=
4351 =?iso-8859-1?q?xx?=
4352 =?iso-8859-1?q?x_?=
4353 =?iso-8859-1?q?xx?=
4354 =?iso-8859-1?q?xx?=
4355 =?iso-8859-1?q?_x?=
4356 =?iso-8859-1?q?xx?=
4357 =?iso-8859-1?q?x_?=
4358 =?iso-8859-1?q?xx?=
4359 =?iso-8859-1?q?xx?=
4360 =?iso-8859-1?q?_x?=
4361 =?iso-8859-1?q?xx?=
4362 =?iso-8859-1?q?x_?=
4363 =?iso-8859-1?q?xx?=
4364 =?iso-8859-1?q?xx?=
4365 =?iso-8859-1?q?_?=""")
4366 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004367 h = Header(charset='iso-8859-1', maxlinelen=40)
4368 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004369 s = h.encode()
4370 eq(s, """\
4371=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4372 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4373 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4374 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4375 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4376 eq(x, str(make_header(decode_header(s))))
4377
4378 def test_base64_splittable(self):
4379 eq = self.ndiffAssertEqual
4380 h = Header(charset='koi8-r', maxlinelen=20)
4381 x = 'xxxx ' * 20
4382 h.append(x)
4383 s = h.encode()
4384 eq(s, """\
4385=?koi8-r?b?eHh4?=
4386 =?koi8-r?b?eCB4?=
4387 =?koi8-r?b?eHh4?=
4388 =?koi8-r?b?IHh4?=
4389 =?koi8-r?b?eHgg?=
4390 =?koi8-r?b?eHh4?=
4391 =?koi8-r?b?eCB4?=
4392 =?koi8-r?b?eHh4?=
4393 =?koi8-r?b?IHh4?=
4394 =?koi8-r?b?eHgg?=
4395 =?koi8-r?b?eHh4?=
4396 =?koi8-r?b?eCB4?=
4397 =?koi8-r?b?eHh4?=
4398 =?koi8-r?b?IHh4?=
4399 =?koi8-r?b?eHgg?=
4400 =?koi8-r?b?eHh4?=
4401 =?koi8-r?b?eCB4?=
4402 =?koi8-r?b?eHh4?=
4403 =?koi8-r?b?IHh4?=
4404 =?koi8-r?b?eHgg?=
4405 =?koi8-r?b?eHh4?=
4406 =?koi8-r?b?eCB4?=
4407 =?koi8-r?b?eHh4?=
4408 =?koi8-r?b?IHh4?=
4409 =?koi8-r?b?eHgg?=
4410 =?koi8-r?b?eHh4?=
4411 =?koi8-r?b?eCB4?=
4412 =?koi8-r?b?eHh4?=
4413 =?koi8-r?b?IHh4?=
4414 =?koi8-r?b?eHgg?=
4415 =?koi8-r?b?eHh4?=
4416 =?koi8-r?b?eCB4?=
4417 =?koi8-r?b?eHh4?=
4418 =?koi8-r?b?IA==?=""")
4419 eq(x, str(make_header(decode_header(s))))
4420 h = Header(charset='koi8-r', maxlinelen=40)
4421 h.append(x)
4422 s = h.encode()
4423 eq(s, """\
4424=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4425 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4426 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4427 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4428 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4429 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4430 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004431
4432 def test_us_ascii_header(self):
4433 eq = self.assertEqual
4434 s = 'hello'
4435 x = decode_header(s)
4436 eq(x, [('hello', None)])
4437 h = make_header(x)
4438 eq(s, h.encode())
4439
4440 def test_string_charset(self):
4441 eq = self.assertEqual
4442 h = Header()
4443 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004444 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004445
4446## def test_unicode_error(self):
4447## raises = self.assertRaises
4448## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4449## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4450## h = Header()
4451## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4452## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4453## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4454
4455 def test_utf8_shortest(self):
4456 eq = self.assertEqual
4457 h = Header('p\xf6stal', 'utf-8')
4458 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4459 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4460 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4461
4462 def test_bad_8bit_header(self):
4463 raises = self.assertRaises
4464 eq = self.assertEqual
4465 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4466 raises(UnicodeError, Header, x)
4467 h = Header()
4468 raises(UnicodeError, h.append, x)
4469 e = x.decode('utf-8', 'replace')
4470 eq(str(Header(x, errors='replace')), e)
4471 h.append(x, errors='replace')
4472 eq(str(h), e)
4473
R David Murray041015c2011-03-25 15:10:55 -04004474 def test_escaped_8bit_header(self):
4475 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004476 e = x.decode('ascii', 'surrogateescape')
4477 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004478 self.assertEqual(str(h),
4479 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4480 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4481
R David Murraye5e366c2011-06-18 12:57:28 -04004482 def test_header_handles_binary_unknown8bit(self):
4483 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4484 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4485 self.assertEqual(str(h),
4486 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4487 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4488
4489 def test_make_header_handles_binary_unknown8bit(self):
4490 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4491 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4492 h2 = email.header.make_header(email.header.decode_header(h))
4493 self.assertEqual(str(h2),
4494 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4495 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4496
R David Murray041015c2011-03-25 15:10:55 -04004497 def test_modify_returned_list_does_not_change_header(self):
4498 h = Header('test')
4499 chunks = email.header.decode_header(h)
4500 chunks.append(('ascii', 'test2'))
4501 self.assertEqual(str(h), 'test')
4502
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004503 def test_encoded_adjacent_nonencoded(self):
4504 eq = self.assertEqual
4505 h = Header()
4506 h.append('hello', 'iso-8859-1')
4507 h.append('world')
4508 s = h.encode()
4509 eq(s, '=?iso-8859-1?q?hello?= world')
4510 h = make_header(decode_header(s))
4511 eq(h.encode(), s)
4512
R David Murray07ea53c2012-06-02 17:56:49 -04004513 def test_whitespace_keeper(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004514 eq = self.assertEqual
4515 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4516 parts = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04004517 eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004518 hdr = make_header(parts)
4519 eq(hdr.encode(),
4520 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4521
4522 def test_broken_base64_header(self):
4523 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004524 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004525 raises(errors.HeaderParseError, decode_header, s)
4526
R. David Murray477efb32011-01-05 01:39:32 +00004527 def test_shift_jis_charset(self):
4528 h = Header('文', charset='shift_jis')
4529 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4530
R David Murrayde912762011-03-16 18:26:23 -04004531 def test_flatten_header_with_no_value(self):
4532 # Issue 11401 (regression from email 4.x) Note that the space after
4533 # the header doesn't reflect the input, but this is also the way
4534 # email 4.x behaved. At some point it would be nice to fix that.
4535 msg = email.message_from_string("EmptyHeader:")
4536 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4537
R David Murray01581ee2011-04-18 10:04:34 -04004538 def test_encode_preserves_leading_ws_on_value(self):
4539 msg = Message()
4540 msg['SomeHeader'] = ' value with leading ws'
4541 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4542
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004543
Ezio Melottib3aedd42010-11-20 19:04:17 +00004544
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004545# Test RFC 2231 header parameters (en/de)coding
4546class TestRFC2231(TestEmailBase):
R David Murray97f43c02012-06-24 05:03:27 -04004547
4548 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
4549 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004550 def test_get_param(self):
4551 eq = self.assertEqual
4552 msg = self._msgobj('msg_29.txt')
4553 eq(msg.get_param('title'),
4554 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4555 eq(msg.get_param('title', unquote=False),
4556 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4557
4558 def test_set_param(self):
4559 eq = self.ndiffAssertEqual
4560 msg = Message()
4561 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4562 charset='us-ascii')
4563 eq(msg.get_param('title'),
4564 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4565 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4566 charset='us-ascii', language='en')
4567 eq(msg.get_param('title'),
4568 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4569 msg = self._msgobj('msg_01.txt')
4570 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4571 charset='us-ascii', language='en')
4572 eq(msg.as_string(maxheaderlen=78), """\
4573Return-Path: <bbb@zzz.org>
4574Delivered-To: bbb@zzz.org
4575Received: by mail.zzz.org (Postfix, from userid 889)
4576\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4577MIME-Version: 1.0
4578Content-Transfer-Encoding: 7bit
4579Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4580From: bbb@ddd.com (John X. Doe)
4581To: bbb@zzz.org
4582Subject: This is a test message
4583Date: Fri, 4 May 2001 14:05:44 -0400
4584Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004585 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004586
4587
4588Hi,
4589
4590Do you like this message?
4591
4592-Me
4593""")
4594
R David Murraya2860e82011-04-16 09:20:30 -04004595 def test_set_param_requote(self):
4596 msg = Message()
4597 msg.set_param('title', 'foo')
4598 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4599 msg.set_param('title', 'bar', requote=False)
4600 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4601 # tspecial is still quoted.
4602 msg.set_param('title', "(bar)bell", requote=False)
4603 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4604
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004605 def test_del_param(self):
4606 eq = self.ndiffAssertEqual
4607 msg = self._msgobj('msg_01.txt')
4608 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4609 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4610 charset='us-ascii', language='en')
4611 msg.del_param('foo', header='Content-Type')
4612 eq(msg.as_string(maxheaderlen=78), """\
4613Return-Path: <bbb@zzz.org>
4614Delivered-To: bbb@zzz.org
4615Received: by mail.zzz.org (Postfix, from userid 889)
4616\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4617MIME-Version: 1.0
4618Content-Transfer-Encoding: 7bit
4619Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4620From: bbb@ddd.com (John X. Doe)
4621To: bbb@zzz.org
4622Subject: This is a test message
4623Date: Fri, 4 May 2001 14:05:44 -0400
4624Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004625 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004626
4627
4628Hi,
4629
4630Do you like this message?
4631
4632-Me
4633""")
4634
R David Murray97f43c02012-06-24 05:03:27 -04004635 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset
4636 # I changed the charset name, though, because the one in the file isn't
4637 # a legal charset name. Should add a test for an illegal charset.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004638 def test_rfc2231_get_content_charset(self):
4639 eq = self.assertEqual
4640 msg = self._msgobj('msg_32.txt')
4641 eq(msg.get_content_charset(), 'us-ascii')
4642
R David Murray97f43c02012-06-24 05:03:27 -04004643 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes
R. David Murraydfd7eb02010-12-24 22:36:49 +00004644 def test_rfc2231_parse_rfc_quoting(self):
4645 m = textwrap.dedent('''\
4646 Content-Disposition: inline;
4647 \tfilename*0*=''This%20is%20even%20more%20;
4648 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4649 \tfilename*2="is it not.pdf"
4650
4651 ''')
4652 msg = email.message_from_string(m)
4653 self.assertEqual(msg.get_filename(),
4654 'This is even more ***fun*** is it not.pdf')
4655 self.assertEqual(m, msg.as_string())
4656
R David Murray97f43c02012-06-24 05:03:27 -04004657 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
R. David Murraydfd7eb02010-12-24 22:36:49 +00004658 def test_rfc2231_parse_extra_quoting(self):
4659 m = textwrap.dedent('''\
4660 Content-Disposition: inline;
4661 \tfilename*0*="''This%20is%20even%20more%20";
4662 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4663 \tfilename*2="is it not.pdf"
4664
4665 ''')
4666 msg = email.message_from_string(m)
4667 self.assertEqual(msg.get_filename(),
4668 'This is even more ***fun*** is it not.pdf')
4669 self.assertEqual(m, msg.as_string())
4670
R David Murray97f43c02012-06-24 05:03:27 -04004671 # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset
4672 # but new test uses *0* because otherwise lang/charset is not valid.
4673 # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004674 def test_rfc2231_no_language_or_charset(self):
4675 m = '''\
4676Content-Transfer-Encoding: 8bit
4677Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4678Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4679
4680'''
4681 msg = email.message_from_string(m)
4682 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004683 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004684 self.assertEqual(
4685 param,
4686 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4687
R David Murray97f43c02012-06-24 05:03:27 -04004688 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004689 def test_rfc2231_no_language_or_charset_in_filename(self):
4690 m = '''\
4691Content-Disposition: inline;
4692\tfilename*0*="''This%20is%20even%20more%20";
4693\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4694\tfilename*2="is it not.pdf"
4695
4696'''
4697 msg = email.message_from_string(m)
4698 self.assertEqual(msg.get_filename(),
4699 'This is even more ***fun*** is it not.pdf')
4700
R David Murray97f43c02012-06-24 05:03:27 -04004701 # Duplicate of previous test?
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004702 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4703 m = '''\
4704Content-Disposition: inline;
4705\tfilename*0*="''This%20is%20even%20more%20";
4706\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4707\tfilename*2="is it not.pdf"
4708
4709'''
4710 msg = email.message_from_string(m)
4711 self.assertEqual(msg.get_filename(),
4712 'This is even more ***fun*** is it not.pdf')
4713
R David Murray97f43c02012-06-24 05:03:27 -04004714 # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded,
4715 # but the test below is wrong (the first part should be decoded).
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004716 def test_rfc2231_partly_encoded(self):
4717 m = '''\
4718Content-Disposition: inline;
4719\tfilename*0="''This%20is%20even%20more%20";
4720\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4721\tfilename*2="is it not.pdf"
4722
4723'''
4724 msg = email.message_from_string(m)
4725 self.assertEqual(
4726 msg.get_filename(),
4727 'This%20is%20even%20more%20***fun*** is it not.pdf')
4728
4729 def test_rfc2231_partly_nonencoded(self):
4730 m = '''\
4731Content-Disposition: inline;
4732\tfilename*0="This%20is%20even%20more%20";
4733\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4734\tfilename*2="is it not.pdf"
4735
4736'''
4737 msg = email.message_from_string(m)
4738 self.assertEqual(
4739 msg.get_filename(),
4740 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4741
4742 def test_rfc2231_no_language_or_charset_in_boundary(self):
4743 m = '''\
4744Content-Type: multipart/alternative;
4745\tboundary*0*="''This%20is%20even%20more%20";
4746\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4747\tboundary*2="is it not.pdf"
4748
4749'''
4750 msg = email.message_from_string(m)
4751 self.assertEqual(msg.get_boundary(),
4752 'This is even more ***fun*** is it not.pdf')
4753
4754 def test_rfc2231_no_language_or_charset_in_charset(self):
4755 # This is a nonsensical charset value, but tests the code anyway
4756 m = '''\
4757Content-Type: text/plain;
4758\tcharset*0*="This%20is%20even%20more%20";
4759\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4760\tcharset*2="is it not.pdf"
4761
4762'''
4763 msg = email.message_from_string(m)
4764 self.assertEqual(msg.get_content_charset(),
4765 'this is even more ***fun*** is it not.pdf')
4766
R David Murray97f43c02012-06-24 05:03:27 -04004767 # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004768 def test_rfc2231_bad_encoding_in_filename(self):
4769 m = '''\
4770Content-Disposition: inline;
4771\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4772\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4773\tfilename*2="is it not.pdf"
4774
4775'''
4776 msg = email.message_from_string(m)
4777 self.assertEqual(msg.get_filename(),
4778 'This is even more ***fun*** is it not.pdf')
4779
4780 def test_rfc2231_bad_encoding_in_charset(self):
4781 m = """\
4782Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4783
4784"""
4785 msg = email.message_from_string(m)
4786 # This should return None because non-ascii characters in the charset
4787 # are not allowed.
4788 self.assertEqual(msg.get_content_charset(), None)
4789
4790 def test_rfc2231_bad_character_in_charset(self):
4791 m = """\
4792Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4793
4794"""
4795 msg = email.message_from_string(m)
4796 # This should return None because non-ascii characters in the charset
4797 # are not allowed.
4798 self.assertEqual(msg.get_content_charset(), None)
4799
4800 def test_rfc2231_bad_character_in_filename(self):
4801 m = '''\
4802Content-Disposition: inline;
4803\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4804\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4805\tfilename*2*="is it not.pdf%E2"
4806
4807'''
4808 msg = email.message_from_string(m)
4809 self.assertEqual(msg.get_filename(),
4810 'This is even more ***fun*** is it not.pdf\ufffd')
4811
4812 def test_rfc2231_unknown_encoding(self):
4813 m = """\
4814Content-Transfer-Encoding: 8bit
4815Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4816
4817"""
4818 msg = email.message_from_string(m)
4819 self.assertEqual(msg.get_filename(), 'myfile.txt')
4820
4821 def test_rfc2231_single_tick_in_filename_extended(self):
4822 eq = self.assertEqual
4823 m = """\
4824Content-Type: application/x-foo;
4825\tname*0*=\"Frank's\"; name*1*=\" Document\"
4826
4827"""
4828 msg = email.message_from_string(m)
4829 charset, language, s = msg.get_param('name')
4830 eq(charset, None)
4831 eq(language, None)
4832 eq(s, "Frank's Document")
4833
R David Murray97f43c02012-06-24 05:03:27 -04004834 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004835 def test_rfc2231_single_tick_in_filename(self):
4836 m = """\
4837Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4838
4839"""
4840 msg = email.message_from_string(m)
4841 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004842 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004843 self.assertEqual(param, "Frank's Document")
4844
R David Murray97f43c02012-06-24 05:03:27 -04004845 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004846 def test_rfc2231_tick_attack_extended(self):
4847 eq = self.assertEqual
4848 m = """\
4849Content-Type: application/x-foo;
4850\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4851
4852"""
4853 msg = email.message_from_string(m)
4854 charset, language, s = msg.get_param('name')
4855 eq(charset, 'us-ascii')
4856 eq(language, 'en-us')
4857 eq(s, "Frank's Document")
4858
R David Murray97f43c02012-06-24 05:03:27 -04004859 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004860 def test_rfc2231_tick_attack(self):
4861 m = """\
4862Content-Type: application/x-foo;
4863\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4864
4865"""
4866 msg = email.message_from_string(m)
4867 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004868 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004869 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4870
R David Murray97f43c02012-06-24 05:03:27 -04004871 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004872 def test_rfc2231_no_extended_values(self):
4873 eq = self.assertEqual
4874 m = """\
4875Content-Type: application/x-foo; name=\"Frank's Document\"
4876
4877"""
4878 msg = email.message_from_string(m)
4879 eq(msg.get_param('name'), "Frank's Document")
4880
R David Murray97f43c02012-06-24 05:03:27 -04004881 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004882 def test_rfc2231_encoded_then_unencoded_segments(self):
4883 eq = self.assertEqual
4884 m = """\
4885Content-Type: application/x-foo;
4886\tname*0*=\"us-ascii'en-us'My\";
4887\tname*1=\" Document\";
4888\tname*2*=\" For You\"
4889
4890"""
4891 msg = email.message_from_string(m)
4892 charset, language, s = msg.get_param('name')
4893 eq(charset, 'us-ascii')
4894 eq(language, 'en-us')
4895 eq(s, 'My Document For You')
4896
R David Murray97f43c02012-06-24 05:03:27 -04004897 # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments
4898 # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004899 def test_rfc2231_unencoded_then_encoded_segments(self):
4900 eq = self.assertEqual
4901 m = """\
4902Content-Type: application/x-foo;
4903\tname*0=\"us-ascii'en-us'My\";
4904\tname*1*=\" Document\";
4905\tname*2*=\" For You\"
4906
4907"""
4908 msg = email.message_from_string(m)
4909 charset, language, s = msg.get_param('name')
4910 eq(charset, 'us-ascii')
4911 eq(language, 'en-us')
4912 eq(s, 'My Document For You')
4913
4914
Ezio Melottib3aedd42010-11-20 19:04:17 +00004915
R. David Murraya8f480f2010-01-16 18:30:03 +00004916# Tests to ensure that signed parts of an email are completely preserved, as
4917# required by RFC1847 section 2.1. Note that these are incomplete, because the
4918# email package does not currently always preserve the body. See issue 1670765.
4919class TestSigned(TestEmailBase):
4920
4921 def _msg_and_obj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -04004922 with openfile(filename) as fp:
R. David Murraya8f480f2010-01-16 18:30:03 +00004923 original = fp.read()
4924 msg = email.message_from_string(original)
4925 return original, msg
4926
4927 def _signed_parts_eq(self, original, result):
4928 # Extract the first mime part of each message
4929 import re
4930 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4931 inpart = repart.search(original).group(2)
4932 outpart = repart.search(result).group(2)
4933 self.assertEqual(outpart, inpart)
4934
4935 def test_long_headers_as_string(self):
4936 original, msg = self._msg_and_obj('msg_45.txt')
4937 result = msg.as_string()
4938 self._signed_parts_eq(original, result)
4939
4940 def test_long_headers_as_string_maxheaderlen(self):
4941 original, msg = self._msg_and_obj('msg_45.txt')
4942 result = msg.as_string(maxheaderlen=60)
4943 self._signed_parts_eq(original, result)
4944
4945 def test_long_headers_flatten(self):
4946 original, msg = self._msg_and_obj('msg_45.txt')
4947 fp = StringIO()
4948 Generator(fp).flatten(msg)
4949 result = fp.getvalue()
4950 self._signed_parts_eq(original, result)
4951
4952
Ezio Melottib3aedd42010-11-20 19:04:17 +00004953
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004954if __name__ == '__main__':
R David Murray9aaba782011-03-21 17:17:06 -04004955 unittest.main()