blob: eaed26f844ff3da7e1af5c31965c32b13c62ab5b [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
R. David Murray719a4492010-11-21 16:53:48 +00005import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00006import time
7import base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +00008import unittest
R. David Murray96fd54e2010-10-08 15:55:28 +00009import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000010
R. David Murray96fd54e2010-10-08 15:55:28 +000011from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000012from itertools import chain
13
14import email
R David Murrayc27e5222012-05-25 15:01:48 -040015import email.policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016
17from email.charset import Charset
18from email.header import Header, decode_header, make_header
19from email.parser import Parser, HeaderParser
R David Murray638d40b2012-08-24 11:14:13 -040020from email.generator import Generator, DecodedGenerator, BytesGenerator
Guido van Rossum8b3febe2007-08-30 01:15:14 +000021from email.message import Message
22from email.mime.application import MIMEApplication
23from email.mime.audio import MIMEAudio
24from email.mime.text import MIMEText
25from email.mime.image import MIMEImage
26from email.mime.base import MIMEBase
27from email.mime.message import MIMEMessage
28from email.mime.multipart import MIMEMultipart
29from email import utils
30from email import errors
31from email import encoders
32from email import iterators
33from email import base64mime
34from email import quoprimime
35
R David Murray965794e2013-03-07 18:16:47 -050036from test.support import unlink
R David Murraya256bac2011-03-31 12:20:23 -040037from test.test_email import openfile, TestEmailBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +000038
R David Murray612528d2013-03-15 20:38:15 -040039# These imports are documented to work, but we are testing them using a
40# different path, so we import them here just to make sure they are importable.
41from email.parser import FeedParser, BytesFeedParser
42
Guido van Rossum8b3febe2007-08-30 01:15:14 +000043NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Guido van Rossum8b3febe2007-08-30 01:15:14 +000048# Test various aspects of the Message class's API
49class TestMessageAPI(TestEmailBase):
50 def test_get_all(self):
51 eq = self.assertEqual
52 msg = self._msgobj('msg_20.txt')
53 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
54 eq(msg.get_all('xx', 'n/a'), 'n/a')
55
R. David Murraye5db2632010-11-20 15:10:13 +000056 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000057 eq = self.assertEqual
58 msg = Message()
59 eq(msg.get_charset(), None)
60 charset = Charset('iso-8859-1')
61 msg.set_charset(charset)
62 eq(msg['mime-version'], '1.0')
63 eq(msg.get_content_type(), 'text/plain')
64 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
65 eq(msg.get_param('charset'), 'iso-8859-1')
66 eq(msg['content-transfer-encoding'], 'quoted-printable')
67 eq(msg.get_charset().input_charset, 'iso-8859-1')
68 # Remove the charset
69 msg.set_charset(None)
70 eq(msg.get_charset(), None)
71 eq(msg['content-type'], 'text/plain')
72 # Try adding a charset when there's already MIME headers present
73 msg = Message()
74 msg['MIME-Version'] = '2.0'
75 msg['Content-Type'] = 'text/x-weird'
76 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
77 msg.set_charset(charset)
78 eq(msg['mime-version'], '2.0')
79 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
80 eq(msg['content-transfer-encoding'], 'quinted-puntable')
81
82 def test_set_charset_from_string(self):
83 eq = self.assertEqual
84 msg = Message()
85 msg.set_charset('us-ascii')
86 eq(msg.get_charset().input_charset, 'us-ascii')
87 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
88
89 def test_set_payload_with_charset(self):
90 msg = Message()
91 charset = Charset('iso-8859-1')
92 msg.set_payload('This is a string payload', charset)
93 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
94
95 def test_get_charsets(self):
96 eq = self.assertEqual
97
98 msg = self._msgobj('msg_08.txt')
99 charsets = msg.get_charsets()
100 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
101
102 msg = self._msgobj('msg_09.txt')
103 charsets = msg.get_charsets('dingbat')
104 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
105 'koi8-r'])
106
107 msg = self._msgobj('msg_12.txt')
108 charsets = msg.get_charsets()
109 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
110 'iso-8859-3', 'us-ascii', 'koi8-r'])
111
112 def test_get_filename(self):
113 eq = self.assertEqual
114
115 msg = self._msgobj('msg_04.txt')
116 filenames = [p.get_filename() for p in msg.get_payload()]
117 eq(filenames, ['msg.txt', 'msg.txt'])
118
119 msg = self._msgobj('msg_07.txt')
120 subpart = msg.get_payload(1)
121 eq(subpart.get_filename(), 'dingusfish.gif')
122
123 def test_get_filename_with_name_parameter(self):
124 eq = self.assertEqual
125
126 msg = self._msgobj('msg_44.txt')
127 filenames = [p.get_filename() for p in msg.get_payload()]
128 eq(filenames, ['msg.txt', 'msg.txt'])
129
130 def test_get_boundary(self):
131 eq = self.assertEqual
132 msg = self._msgobj('msg_07.txt')
133 # No quotes!
134 eq(msg.get_boundary(), 'BOUNDARY')
135
136 def test_set_boundary(self):
137 eq = self.assertEqual
138 # This one has no existing boundary parameter, but the Content-Type:
139 # header appears fifth.
140 msg = self._msgobj('msg_01.txt')
141 msg.set_boundary('BOUNDARY')
142 header, value = msg.items()[4]
143 eq(header.lower(), 'content-type')
144 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
145 # This one has a Content-Type: header, with a boundary, stuck in the
146 # middle of its headers. Make sure the order is preserved; it should
147 # be fifth.
148 msg = self._msgobj('msg_04.txt')
149 msg.set_boundary('BOUNDARY')
150 header, value = msg.items()[4]
151 eq(header.lower(), 'content-type')
152 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
153 # And this one has no Content-Type: header at all.
154 msg = self._msgobj('msg_03.txt')
155 self.assertRaises(errors.HeaderParseError,
156 msg.set_boundary, 'BOUNDARY')
157
R. David Murray73a559d2010-12-21 18:07:59 +0000158 def test_make_boundary(self):
159 msg = MIMEMultipart('form-data')
160 # Note that when the boundary gets created is an implementation
161 # detail and might change.
162 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
163 # Trigger creation of boundary
164 msg.as_string()
165 self.assertEqual(msg.items()[0][1][:33],
166 'multipart/form-data; boundary="==')
167 # XXX: there ought to be tests of the uniqueness of the boundary, too.
168
R. David Murray57c45ac2010-02-21 04:39:40 +0000169 def test_message_rfc822_only(self):
170 # Issue 7970: message/rfc822 not in multipart parsed by
171 # HeaderParser caused an exception when flattened.
R David Murray28346b82011-03-31 11:40:20 -0400172 with openfile('msg_46.txt') as fp:
Brett Cannon384917a2010-10-29 23:08:36 +0000173 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000174 parser = HeaderParser()
175 msg = parser.parsestr(msgdata)
176 out = StringIO()
177 gen = Generator(out, True, 0)
178 gen.flatten(msg, False)
179 self.assertEqual(out.getvalue(), msgdata)
180
R David Murrayb35c8502011-04-13 16:46:05 -0400181 def test_byte_message_rfc822_only(self):
182 # Make sure new bytes header parser also passes this.
183 with openfile('msg_46.txt', 'rb') as fp:
184 msgdata = fp.read()
185 parser = email.parser.BytesHeaderParser()
186 msg = parser.parsebytes(msgdata)
187 out = BytesIO()
188 gen = email.generator.BytesGenerator(out)
189 gen.flatten(msg)
190 self.assertEqual(out.getvalue(), msgdata)
191
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000192 def test_get_decoded_payload(self):
193 eq = self.assertEqual
194 msg = self._msgobj('msg_10.txt')
195 # The outer message is a multipart
196 eq(msg.get_payload(decode=True), None)
197 # Subpart 1 is 7bit encoded
198 eq(msg.get_payload(0).get_payload(decode=True),
199 b'This is a 7bit encoded message.\n')
200 # Subpart 2 is quopri
201 eq(msg.get_payload(1).get_payload(decode=True),
202 b'\xa1This is a Quoted Printable encoded message!\n')
203 # Subpart 3 is base64
204 eq(msg.get_payload(2).get_payload(decode=True),
205 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000206 # Subpart 4 is base64 with a trailing newline, which
207 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000208 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000209 b'This is a Base64 encoded message.\n')
210 # Subpart 5 has no Content-Transfer-Encoding: header.
211 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000212 b'This has no Content-Transfer-Encoding: header.\n')
213
214 def test_get_decoded_uu_payload(self):
215 eq = self.assertEqual
216 msg = Message()
217 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
218 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
219 msg['content-transfer-encoding'] = cte
220 eq(msg.get_payload(decode=True), b'hello world')
221 # Now try some bogus data
222 msg.set_payload('foo')
223 eq(msg.get_payload(decode=True), b'foo')
224
R David Murraya2860e82011-04-16 09:20:30 -0400225 def test_get_payload_n_raises_on_non_multipart(self):
226 msg = Message()
227 self.assertRaises(TypeError, msg.get_payload, 1)
228
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000229 def test_decoded_generator(self):
230 eq = self.assertEqual
231 msg = self._msgobj('msg_07.txt')
232 with openfile('msg_17.txt') as fp:
233 text = fp.read()
234 s = StringIO()
235 g = DecodedGenerator(s)
236 g.flatten(msg)
237 eq(s.getvalue(), text)
238
239 def test__contains__(self):
240 msg = Message()
241 msg['From'] = 'Me'
242 msg['to'] = 'You'
243 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000244 self.assertTrue('from' in msg)
245 self.assertTrue('From' in msg)
246 self.assertTrue('FROM' in msg)
247 self.assertTrue('to' in msg)
248 self.assertTrue('To' in msg)
249 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000250
251 def test_as_string(self):
252 eq = self.ndiffAssertEqual
253 msg = self._msgobj('msg_01.txt')
254 with openfile('msg_01.txt') as fp:
255 text = fp.read()
256 eq(text, str(msg))
257 fullrepr = msg.as_string(unixfrom=True)
258 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000259 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000260 eq(text, NL.join(lines[1:]))
261
R David Murray97f43c02012-06-24 05:03:27 -0400262 # test_headerregistry.TestContentTypeHeader.bad_params
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000263 def test_bad_param(self):
264 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
265 self.assertEqual(msg.get_param('baz'), '')
266
267 def test_missing_filename(self):
268 msg = email.message_from_string("From: foo\n")
269 self.assertEqual(msg.get_filename(), None)
270
271 def test_bogus_filename(self):
272 msg = email.message_from_string(
273 "Content-Disposition: blarg; filename\n")
274 self.assertEqual(msg.get_filename(), '')
275
276 def test_missing_boundary(self):
277 msg = email.message_from_string("From: foo\n")
278 self.assertEqual(msg.get_boundary(), None)
279
280 def test_get_params(self):
281 eq = self.assertEqual
282 msg = email.message_from_string(
283 'X-Header: foo=one; bar=two; baz=three\n')
284 eq(msg.get_params(header='x-header'),
285 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
286 msg = email.message_from_string(
287 'X-Header: foo; bar=one; baz=two\n')
288 eq(msg.get_params(header='x-header'),
289 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
290 eq(msg.get_params(), None)
291 msg = email.message_from_string(
292 'X-Header: foo; bar="one"; baz=two\n')
293 eq(msg.get_params(header='x-header'),
294 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
295
R David Murray97f43c02012-06-24 05:03:27 -0400296 # test_headerregistry.TestContentTypeHeader.spaces_around_param_equals
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000297 def test_get_param_liberal(self):
298 msg = Message()
299 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
300 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
301
302 def test_get_param(self):
303 eq = self.assertEqual
304 msg = email.message_from_string(
305 "X-Header: foo=one; bar=two; baz=three\n")
306 eq(msg.get_param('bar', header='x-header'), 'two')
307 eq(msg.get_param('quuz', header='x-header'), None)
308 eq(msg.get_param('quuz'), None)
309 msg = email.message_from_string(
310 'X-Header: foo; bar="one"; baz=two\n')
311 eq(msg.get_param('foo', header='x-header'), '')
312 eq(msg.get_param('bar', header='x-header'), 'one')
313 eq(msg.get_param('baz', header='x-header'), 'two')
314 # XXX: We are not RFC-2045 compliant! We cannot parse:
315 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
316 # msg.get_param("weird")
317 # yet.
318
R David Murray97f43c02012-06-24 05:03:27 -0400319 # test_headerregistry.TestContentTypeHeader.spaces_around_semis
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000320 def test_get_param_funky_continuation_lines(self):
321 msg = self._msgobj('msg_22.txt')
322 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
323
R David Murray97f43c02012-06-24 05:03:27 -0400324 # test_headerregistry.TestContentTypeHeader.semis_inside_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000325 def test_get_param_with_semis_in_quotes(self):
326 msg = email.message_from_string(
327 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
328 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
329 self.assertEqual(msg.get_param('name', unquote=False),
330 '"Jim&amp;&amp;Jill"')
331
R David Murray97f43c02012-06-24 05:03:27 -0400332 # test_headerregistry.TestContentTypeHeader.quotes_inside_rfc2231_value
R. David Murrayd48739f2010-04-14 18:59:18 +0000333 def test_get_param_with_quotes(self):
334 msg = email.message_from_string(
335 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
336 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
337 msg = email.message_from_string(
338 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
339 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
340
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000341 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000342 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000343 msg = email.message_from_string('Header: exists')
344 unless('header' in msg)
345 unless('Header' in msg)
346 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000347 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000348
349 def test_set_param(self):
350 eq = self.assertEqual
351 msg = Message()
352 msg.set_param('charset', 'iso-2022-jp')
353 eq(msg.get_param('charset'), 'iso-2022-jp')
354 msg.set_param('importance', 'high value')
355 eq(msg.get_param('importance'), 'high value')
356 eq(msg.get_param('importance', unquote=False), '"high value"')
357 eq(msg.get_params(), [('text/plain', ''),
358 ('charset', 'iso-2022-jp'),
359 ('importance', 'high value')])
360 eq(msg.get_params(unquote=False), [('text/plain', ''),
361 ('charset', '"iso-2022-jp"'),
362 ('importance', '"high value"')])
363 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
364 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
365
366 def test_del_param(self):
367 eq = self.assertEqual
368 msg = self._msgobj('msg_05.txt')
369 eq(msg.get_params(),
370 [('multipart/report', ''), ('report-type', 'delivery-status'),
371 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
372 old_val = msg.get_param("report-type")
373 msg.del_param("report-type")
374 eq(msg.get_params(),
375 [('multipart/report', ''),
376 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
377 msg.set_param("report-type", old_val)
378 eq(msg.get_params(),
379 [('multipart/report', ''),
380 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
381 ('report-type', old_val)])
382
383 def test_del_param_on_other_header(self):
384 msg = Message()
385 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
386 msg.del_param('filename', 'content-disposition')
387 self.assertEqual(msg['content-disposition'], 'attachment')
388
R David Murraya2860e82011-04-16 09:20:30 -0400389 def test_del_param_on_nonexistent_header(self):
390 msg = Message()
391 msg.del_param('filename', 'content-disposition')
392
393 def test_del_nonexistent_param(self):
394 msg = Message()
395 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
396 existing_header = msg['Content-Type']
397 msg.del_param('foobar', header='Content-Type')
398 self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"')
399
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000400 def test_set_type(self):
401 eq = self.assertEqual
402 msg = Message()
403 self.assertRaises(ValueError, msg.set_type, 'text')
404 msg.set_type('text/plain')
405 eq(msg['content-type'], 'text/plain')
406 msg.set_param('charset', 'us-ascii')
407 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
408 msg.set_type('text/html')
409 eq(msg['content-type'], 'text/html; charset="us-ascii"')
410
411 def test_set_type_on_other_header(self):
412 msg = Message()
413 msg['X-Content-Type'] = 'text/plain'
414 msg.set_type('application/octet-stream', 'X-Content-Type')
415 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
416
417 def test_get_content_type_missing(self):
418 msg = Message()
419 self.assertEqual(msg.get_content_type(), 'text/plain')
420
421 def test_get_content_type_missing_with_default_type(self):
422 msg = Message()
423 msg.set_default_type('message/rfc822')
424 self.assertEqual(msg.get_content_type(), 'message/rfc822')
425
426 def test_get_content_type_from_message_implicit(self):
427 msg = self._msgobj('msg_30.txt')
428 self.assertEqual(msg.get_payload(0).get_content_type(),
429 'message/rfc822')
430
431 def test_get_content_type_from_message_explicit(self):
432 msg = self._msgobj('msg_28.txt')
433 self.assertEqual(msg.get_payload(0).get_content_type(),
434 'message/rfc822')
435
436 def test_get_content_type_from_message_text_plain_implicit(self):
437 msg = self._msgobj('msg_03.txt')
438 self.assertEqual(msg.get_content_type(), 'text/plain')
439
440 def test_get_content_type_from_message_text_plain_explicit(self):
441 msg = self._msgobj('msg_01.txt')
442 self.assertEqual(msg.get_content_type(), 'text/plain')
443
444 def test_get_content_maintype_missing(self):
445 msg = Message()
446 self.assertEqual(msg.get_content_maintype(), 'text')
447
448 def test_get_content_maintype_missing_with_default_type(self):
449 msg = Message()
450 msg.set_default_type('message/rfc822')
451 self.assertEqual(msg.get_content_maintype(), 'message')
452
453 def test_get_content_maintype_from_message_implicit(self):
454 msg = self._msgobj('msg_30.txt')
455 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
456
457 def test_get_content_maintype_from_message_explicit(self):
458 msg = self._msgobj('msg_28.txt')
459 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
460
461 def test_get_content_maintype_from_message_text_plain_implicit(self):
462 msg = self._msgobj('msg_03.txt')
463 self.assertEqual(msg.get_content_maintype(), 'text')
464
465 def test_get_content_maintype_from_message_text_plain_explicit(self):
466 msg = self._msgobj('msg_01.txt')
467 self.assertEqual(msg.get_content_maintype(), 'text')
468
469 def test_get_content_subtype_missing(self):
470 msg = Message()
471 self.assertEqual(msg.get_content_subtype(), 'plain')
472
473 def test_get_content_subtype_missing_with_default_type(self):
474 msg = Message()
475 msg.set_default_type('message/rfc822')
476 self.assertEqual(msg.get_content_subtype(), 'rfc822')
477
478 def test_get_content_subtype_from_message_implicit(self):
479 msg = self._msgobj('msg_30.txt')
480 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
481
482 def test_get_content_subtype_from_message_explicit(self):
483 msg = self._msgobj('msg_28.txt')
484 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
485
486 def test_get_content_subtype_from_message_text_plain_implicit(self):
487 msg = self._msgobj('msg_03.txt')
488 self.assertEqual(msg.get_content_subtype(), 'plain')
489
490 def test_get_content_subtype_from_message_text_plain_explicit(self):
491 msg = self._msgobj('msg_01.txt')
492 self.assertEqual(msg.get_content_subtype(), 'plain')
493
494 def test_get_content_maintype_error(self):
495 msg = Message()
496 msg['Content-Type'] = 'no-slash-in-this-string'
497 self.assertEqual(msg.get_content_maintype(), 'text')
498
499 def test_get_content_subtype_error(self):
500 msg = Message()
501 msg['Content-Type'] = 'no-slash-in-this-string'
502 self.assertEqual(msg.get_content_subtype(), 'plain')
503
504 def test_replace_header(self):
505 eq = self.assertEqual
506 msg = Message()
507 msg.add_header('First', 'One')
508 msg.add_header('Second', 'Two')
509 msg.add_header('Third', 'Three')
510 eq(msg.keys(), ['First', 'Second', 'Third'])
511 eq(msg.values(), ['One', 'Two', 'Three'])
512 msg.replace_header('Second', 'Twenty')
513 eq(msg.keys(), ['First', 'Second', 'Third'])
514 eq(msg.values(), ['One', 'Twenty', 'Three'])
515 msg.add_header('First', 'Eleven')
516 msg.replace_header('First', 'One Hundred')
517 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
518 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
519 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
520
R David Murray80e0aee2012-05-27 21:23:34 -0400521 # test_defect_handling:test_invalid_chars_in_base64_payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000522 def test_broken_base64_payload(self):
523 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
524 msg = Message()
525 msg['content-type'] = 'audio/x-midi'
526 msg['content-transfer-encoding'] = 'base64'
527 msg.set_payload(x)
528 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -0400529 (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0'
530 b'\xa1\x00p\xf6\xbf\xe9\x0f'))
531 self.assertIsInstance(msg.defects[0],
532 errors.InvalidBase64CharactersDefect)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000533
R David Murraya2860e82011-04-16 09:20:30 -0400534 def test_broken_unicode_payload(self):
535 # This test improves coverage but is not a compliance test.
536 # The behavior in this situation is currently undefined by the API.
537 x = 'this is a br\xf6ken thing to do'
538 msg = Message()
539 msg['content-type'] = 'text/plain'
540 msg['content-transfer-encoding'] = '8bit'
541 msg.set_payload(x)
542 self.assertEqual(msg.get_payload(decode=True),
543 bytes(x, 'raw-unicode-escape'))
544
545 def test_questionable_bytes_payload(self):
546 # This test improves coverage but is not a compliance test,
547 # since it involves poking inside the black box.
548 x = 'this is a quéstionable thing to do'.encode('utf-8')
549 msg = Message()
550 msg['content-type'] = 'text/plain; charset="utf-8"'
551 msg['content-transfer-encoding'] = '8bit'
552 msg._payload = x
553 self.assertEqual(msg.get_payload(decode=True), x)
554
R. David Murray7ec754b2010-12-13 23:51:19 +0000555 # Issue 1078919
556 def test_ascii_add_header(self):
557 msg = Message()
558 msg.add_header('Content-Disposition', 'attachment',
559 filename='bud.gif')
560 self.assertEqual('attachment; filename="bud.gif"',
561 msg['Content-Disposition'])
562
563 def test_noascii_add_header(self):
564 msg = Message()
565 msg.add_header('Content-Disposition', 'attachment',
566 filename="Fußballer.ppt")
567 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000568 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000569 msg['Content-Disposition'])
570
571 def test_nonascii_add_header_via_triple(self):
572 msg = Message()
573 msg.add_header('Content-Disposition', 'attachment',
574 filename=('iso-8859-1', '', 'Fußballer.ppt'))
575 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000576 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
577 msg['Content-Disposition'])
578
579 def test_ascii_add_header_with_tspecial(self):
580 msg = Message()
581 msg.add_header('Content-Disposition', 'attachment',
582 filename="windows [filename].ppt")
583 self.assertEqual(
584 'attachment; filename="windows [filename].ppt"',
585 msg['Content-Disposition'])
586
587 def test_nonascii_add_header_with_tspecial(self):
588 msg = Message()
589 msg.add_header('Content-Disposition', 'attachment',
590 filename="Fußballer [filename].ppt")
591 self.assertEqual(
592 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000593 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000594
R David Murraya2860e82011-04-16 09:20:30 -0400595 def test_add_header_with_name_only_param(self):
596 msg = Message()
597 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
598 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
599
600 def test_add_header_with_no_value(self):
601 msg = Message()
602 msg.add_header('X-Status', None)
603 self.assertEqual('', msg['X-Status'])
604
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000605 # Issue 5871: reject an attempt to embed a header inside a header value
606 # (header injection attack).
607 def test_embeded_header_via_Header_rejected(self):
608 msg = Message()
609 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
610 self.assertRaises(errors.HeaderParseError, msg.as_string)
611
612 def test_embeded_header_via_string_rejected(self):
613 msg = Message()
614 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
615 self.assertRaises(errors.HeaderParseError, msg.as_string)
616
R David Murray7441a7a2012-03-14 02:59:51 -0400617 def test_unicode_header_defaults_to_utf8_encoding(self):
618 # Issue 14291
619 m = MIMEText('abc\n')
620 m['Subject'] = 'É test'
621 self.assertEqual(str(m),textwrap.dedent("""\
622 Content-Type: text/plain; charset="us-ascii"
623 MIME-Version: 1.0
624 Content-Transfer-Encoding: 7bit
625 Subject: =?utf-8?q?=C3=89_test?=
626
627 abc
628 """))
629
R David Murray8680bcc2012-03-22 22:17:51 -0400630 def test_unicode_body_defaults_to_utf8_encoding(self):
631 # Issue 14291
632 m = MIMEText('É testabc\n')
633 self.assertEqual(str(m),textwrap.dedent("""\
R David Murray8680bcc2012-03-22 22:17:51 -0400634 Content-Type: text/plain; charset="utf-8"
R David Murray42243c42012-03-22 22:40:44 -0400635 MIME-Version: 1.0
R David Murray8680bcc2012-03-22 22:17:51 -0400636 Content-Transfer-Encoding: base64
637
638 w4kgdGVzdGFiYwo=
639 """))
640
641
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000642# Test the email.encoders module
643class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400644
645 def test_EncodersEncode_base64(self):
646 with openfile('PyBanner048.gif', 'rb') as fp:
647 bindata = fp.read()
648 mimed = email.mime.image.MIMEImage(bindata)
649 base64ed = mimed.get_payload()
650 # the transfer-encoded body lines should all be <=76 characters
651 lines = base64ed.split('\n')
652 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
653
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000654 def test_encode_empty_payload(self):
655 eq = self.assertEqual
656 msg = Message()
657 msg.set_charset('us-ascii')
658 eq(msg['content-transfer-encoding'], '7bit')
659
660 def test_default_cte(self):
661 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000662 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000663 msg = MIMEText('hello world')
664 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000665 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000666 msg = MIMEText('hello \xf8 world')
R David Murray8680bcc2012-03-22 22:17:51 -0400667 eq(msg['content-transfer-encoding'], 'base64')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000668 # And now with a different charset
669 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
670 eq(msg['content-transfer-encoding'], 'quoted-printable')
671
R. David Murraye85200d2010-05-06 01:41:14 +0000672 def test_encode7or8bit(self):
673 # Make sure a charset whose input character set is 8bit but
674 # whose output character set is 7bit gets a transfer-encoding
675 # of 7bit.
676 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000677 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000678 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000679
R David Murrayf581b372013-02-05 10:49:49 -0500680 def test_qp_encode_latin1(self):
681 msg = MIMEText('\xe1\xf6\n', 'text', 'ISO-8859-1')
682 self.assertEqual(str(msg), textwrap.dedent("""\
683 MIME-Version: 1.0
684 Content-Type: text/text; charset="iso-8859-1"
685 Content-Transfer-Encoding: quoted-printable
686
687 =E1=F6
688 """))
689
690 def test_qp_encode_non_latin1(self):
691 # Issue 16948
692 msg = MIMEText('\u017c\n', 'text', 'ISO-8859-2')
693 self.assertEqual(str(msg), textwrap.dedent("""\
694 MIME-Version: 1.0
695 Content-Type: text/text; charset="iso-8859-2"
696 Content-Transfer-Encoding: quoted-printable
697
698 =BF
699 """))
700
Ezio Melottib3aedd42010-11-20 19:04:17 +0000701
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000702# Test long header wrapping
703class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400704
705 maxDiff = None
706
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000707 def test_split_long_continuation(self):
708 eq = self.ndiffAssertEqual
709 msg = email.message_from_string("""\
710Subject: bug demonstration
711\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
712\tmore text
713
714test
715""")
716 sfp = StringIO()
717 g = Generator(sfp)
718 g.flatten(msg)
719 eq(sfp.getvalue(), """\
720Subject: bug demonstration
721\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
722\tmore text
723
724test
725""")
726
727 def test_another_long_almost_unsplittable_header(self):
728 eq = self.ndiffAssertEqual
729 hstr = """\
730bug demonstration
731\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
732\tmore text"""
733 h = Header(hstr, continuation_ws='\t')
734 eq(h.encode(), """\
735bug demonstration
736\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
737\tmore text""")
738 h = Header(hstr.replace('\t', ' '))
739 eq(h.encode(), """\
740bug demonstration
741 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
742 more text""")
743
744 def test_long_nonstring(self):
745 eq = self.ndiffAssertEqual
746 g = Charset("iso-8859-1")
747 cz = Charset("iso-8859-2")
748 utf8 = Charset("utf-8")
749 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
750 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
751 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
752 b'bef\xf6rdert. ')
753 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
754 b'd\xf9vtipu.. ')
755 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
756 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
757 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
758 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
759 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
760 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
761 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
762 '\u3044\u307e\u3059\u3002')
763 h = Header(g_head, g, header_name='Subject')
764 h.append(cz_head, cz)
765 h.append(utf8_head, utf8)
766 msg = Message()
767 msg['Subject'] = h
768 sfp = StringIO()
769 g = Generator(sfp)
770 g.flatten(msg)
771 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000772Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
773 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
774 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
775 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
776 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
777 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
778 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
779 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
780 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
781 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
782 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000783
784""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000785 eq(h.encode(maxlinelen=76), """\
786=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
787 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
788 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
789 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
790 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
791 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
792 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
793 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
794 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
795 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
796 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000797
798 def test_long_header_encode(self):
799 eq = self.ndiffAssertEqual
800 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
801 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
802 header_name='X-Foobar-Spoink-Defrobnit')
803 eq(h.encode(), '''\
804wasnipoop; giraffes="very-long-necked-animals";
805 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
806
807 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
808 eq = self.ndiffAssertEqual
809 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
810 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
811 header_name='X-Foobar-Spoink-Defrobnit',
812 continuation_ws='\t')
813 eq(h.encode(), '''\
814wasnipoop; giraffes="very-long-necked-animals";
815 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
816
817 def test_long_header_encode_with_tab_continuation(self):
818 eq = self.ndiffAssertEqual
819 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
820 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
821 header_name='X-Foobar-Spoink-Defrobnit',
822 continuation_ws='\t')
823 eq(h.encode(), '''\
824wasnipoop; giraffes="very-long-necked-animals";
825\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
826
R David Murray3a6152f2011-03-14 21:13:03 -0400827 def test_header_encode_with_different_output_charset(self):
828 h = Header('文', 'euc-jp')
829 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
830
831 def test_long_header_encode_with_different_output_charset(self):
832 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
833 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
834 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
835 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
836 res = """\
837=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
838 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
839 self.assertEqual(h.encode(), res)
840
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000841 def test_header_splitter(self):
842 eq = self.ndiffAssertEqual
843 msg = MIMEText('')
844 # It'd be great if we could use add_header() here, but that doesn't
845 # guarantee an order of the parameters.
846 msg['X-Foobar-Spoink-Defrobnit'] = (
847 'wasnipoop; giraffes="very-long-necked-animals"; '
848 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
849 sfp = StringIO()
850 g = Generator(sfp)
851 g.flatten(msg)
852 eq(sfp.getvalue(), '''\
853Content-Type: text/plain; charset="us-ascii"
854MIME-Version: 1.0
855Content-Transfer-Encoding: 7bit
856X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
857 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
858
859''')
860
861 def test_no_semis_header_splitter(self):
862 eq = self.ndiffAssertEqual
863 msg = Message()
864 msg['From'] = 'test@dom.ain'
865 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
866 msg.set_payload('Test')
867 sfp = StringIO()
868 g = Generator(sfp)
869 g.flatten(msg)
870 eq(sfp.getvalue(), """\
871From: test@dom.ain
872References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
873 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
874
875Test""")
876
R David Murray7da4db12011-04-07 20:37:17 -0400877 def test_last_split_chunk_does_not_fit(self):
878 eq = self.ndiffAssertEqual
879 h = Header('Subject: the first part of this is short, but_the_second'
880 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
881 '_all_by_itself')
882 eq(h.encode(), """\
883Subject: the first part of this is short,
884 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
885
886 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
887 eq = self.ndiffAssertEqual
888 h = Header(', but_the_second'
889 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
890 '_all_by_itself')
891 eq(h.encode(), """\
892,
893 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
894
895 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
896 eq = self.ndiffAssertEqual
897 h = Header(', , but_the_second'
898 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
899 '_all_by_itself')
900 eq(h.encode(), """\
901, ,
902 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
903
904 def test_trailing_splitable_on_overlong_unsplitable(self):
905 eq = self.ndiffAssertEqual
906 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
907 'be_on_a_line_all_by_itself;')
908 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
909 "be_on_a_line_all_by_itself;")
910
911 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
912 eq = self.ndiffAssertEqual
913 h = Header('; '
914 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400915 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400916 eq(h.encode(), """\
917;
R David Murray01581ee2011-04-18 10:04:34 -0400918 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400919
R David Murraye1292a22011-04-07 20:54:03 -0400920 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400921 eq = self.ndiffAssertEqual
922 h = Header('This is a long line that has two whitespaces in a row. '
923 'This used to cause truncation of the header when folded')
924 eq(h.encode(), """\
925This is a long line that has two whitespaces in a row. This used to cause
926 truncation of the header when folded""")
927
R David Murray01581ee2011-04-18 10:04:34 -0400928 def test_splitter_split_on_punctuation_only_if_fws(self):
929 eq = self.ndiffAssertEqual
930 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
931 'they;arenotlegal;fold,points')
932 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
933 "arenotlegal;fold,points")
934
935 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
936 eq = self.ndiffAssertEqual
937 h = Header('this is a test where we need to have more than one line '
938 'before; our final line that is just too big to fit;; '
939 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
940 'be_on_a_line_all_by_itself;')
941 eq(h.encode(), """\
942this is a test where we need to have more than one line before;
943 our final line that is just too big to fit;;
944 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
945
946 def test_overlong_last_part_followed_by_split_point(self):
947 eq = self.ndiffAssertEqual
948 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
949 'be_on_a_line_all_by_itself ')
950 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
951 "should_be_on_a_line_all_by_itself ")
952
953 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
954 eq = self.ndiffAssertEqual
955 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
956 'before_our_final_line_; ; '
957 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
958 'be_on_a_line_all_by_itself; ')
959 eq(h.encode(), """\
960this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
961 ;
962 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
963
964 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
965 eq = self.ndiffAssertEqual
966 h = Header('this is a test where we need to have more than one line '
967 'before our final line; ; '
968 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
969 'be_on_a_line_all_by_itself; ')
970 eq(h.encode(), """\
971this is a test where we need to have more than one line before our final line;
972 ;
973 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
974
975 def test_long_header_with_whitespace_runs(self):
976 eq = self.ndiffAssertEqual
977 msg = Message()
978 msg['From'] = 'test@dom.ain'
979 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
980 msg.set_payload('Test')
981 sfp = StringIO()
982 g = Generator(sfp)
983 g.flatten(msg)
984 eq(sfp.getvalue(), """\
985From: test@dom.ain
986References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
987 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
988 <foo@dom.ain> <foo@dom.ain>\x20\x20
989
990Test""")
991
992 def test_long_run_with_semi_header_splitter(self):
993 eq = self.ndiffAssertEqual
994 msg = Message()
995 msg['From'] = 'test@dom.ain'
996 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
997 msg.set_payload('Test')
998 sfp = StringIO()
999 g = Generator(sfp)
1000 g.flatten(msg)
1001 eq(sfp.getvalue(), """\
1002From: test@dom.ain
1003References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1004 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
1005 <foo@dom.ain>; abc
1006
1007Test""")
1008
1009 def test_splitter_split_on_punctuation_only_if_fws(self):
1010 eq = self.ndiffAssertEqual
1011 msg = Message()
1012 msg['From'] = 'test@dom.ain'
1013 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
1014 'they;arenotlegal;fold,points')
1015 msg.set_payload('Test')
1016 sfp = StringIO()
1017 g = Generator(sfp)
1018 g.flatten(msg)
1019 # XXX the space after the header should not be there.
1020 eq(sfp.getvalue(), """\
1021From: test@dom.ain
1022References:\x20
1023 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
1024
1025Test""")
1026
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001027 def test_no_split_long_header(self):
1028 eq = self.ndiffAssertEqual
1029 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +00001030 h = Header(hstr)
1031 # These come on two lines because Headers are really field value
1032 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001033 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001034References:
1035 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1036 h = Header('x' * 80)
1037 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001038
1039 def test_splitting_multiple_long_lines(self):
1040 eq = self.ndiffAssertEqual
1041 hstr = """\
1042from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1043\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1044\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1045"""
1046 h = Header(hstr, continuation_ws='\t')
1047 eq(h.encode(), """\
1048from babylon.socal-raves.org (localhost [127.0.0.1]);
1049 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1050 for <mailman-admin@babylon.socal-raves.org>;
1051 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1052\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1053 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1054 for <mailman-admin@babylon.socal-raves.org>;
1055 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1056\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1057 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1058 for <mailman-admin@babylon.socal-raves.org>;
1059 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1060
1061 def test_splitting_first_line_only_is_long(self):
1062 eq = self.ndiffAssertEqual
1063 hstr = """\
1064from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1065\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1066\tid 17k4h5-00034i-00
1067\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1068 h = Header(hstr, maxlinelen=78, header_name='Received',
1069 continuation_ws='\t')
1070 eq(h.encode(), """\
1071from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1072 helo=cthulhu.gerg.ca)
1073\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1074\tid 17k4h5-00034i-00
1075\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1076
1077 def test_long_8bit_header(self):
1078 eq = self.ndiffAssertEqual
1079 msg = Message()
1080 h = Header('Britische Regierung gibt', 'iso-8859-1',
1081 header_name='Subject')
1082 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001083 eq(h.encode(maxlinelen=76), """\
1084=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1085 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001086 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001087 eq(msg.as_string(maxheaderlen=76), """\
1088Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1089 =?iso-8859-1?q?hore-Windkraftprojekte?=
1090
1091""")
1092 eq(msg.as_string(maxheaderlen=0), """\
1093Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001094
1095""")
1096
1097 def test_long_8bit_header_no_charset(self):
1098 eq = self.ndiffAssertEqual
1099 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001100 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1101 'f\xfcr Offshore-Windkraftprojekte '
1102 '<a-very-long-address@example.com>')
1103 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001104 eq(msg.as_string(maxheaderlen=78), """\
1105Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1106 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1107
1108""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001109 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001110 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001111 header_name='Reply-To')
1112 eq(msg.as_string(maxheaderlen=78), """\
1113Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1114 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001115
1116""")
1117
1118 def test_long_to_header(self):
1119 eq = self.ndiffAssertEqual
1120 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001121 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001122 '"Someone Test #B" <someone@umich.edu>, '
1123 '"Someone Test #C" <someone@eecs.umich.edu>, '
1124 '"Someone Test #D" <someone@eecs.umich.edu>')
1125 msg = Message()
1126 msg['To'] = to
1127 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001128To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001129 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001130 "Someone Test #C" <someone@eecs.umich.edu>,
1131 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001132
1133''')
1134
1135 def test_long_line_after_append(self):
1136 eq = self.ndiffAssertEqual
1137 s = 'This is an example of string which has almost the limit of header length.'
1138 h = Header(s)
1139 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001140 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001141This is an example of string which has almost the limit of header length.
1142 Add another line.""")
1143
1144 def test_shorter_line_with_append(self):
1145 eq = self.ndiffAssertEqual
1146 s = 'This is a shorter line.'
1147 h = Header(s)
1148 h.append('Add another sentence. (Surprise?)')
1149 eq(h.encode(),
1150 'This is a shorter line. Add another sentence. (Surprise?)')
1151
1152 def test_long_field_name(self):
1153 eq = self.ndiffAssertEqual
1154 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001155 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1156 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1157 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1158 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001159 h = Header(gs, 'iso-8859-1', header_name=fn)
1160 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001161 eq(h.encode(maxlinelen=76), """\
1162=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1163 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1164 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1165 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001166
1167 def test_long_received_header(self):
1168 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1169 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1170 'Wed, 05 Mar 2003 18:10:18 -0700')
1171 msg = Message()
1172 msg['Received-1'] = Header(h, continuation_ws='\t')
1173 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001174 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001175 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001176Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1177 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001178 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001179Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1180 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001181 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001182
1183""")
1184
1185 def test_string_headerinst_eq(self):
1186 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1187 'tu-muenchen.de> (David Bremner\'s message of '
1188 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1189 msg = Message()
1190 msg['Received-1'] = Header(h, header_name='Received-1',
1191 continuation_ws='\t')
1192 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001193 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001194 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001195Received-1:\x20
1196 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1197 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1198Received-2:\x20
1199 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1200 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001201
1202""")
1203
1204 def test_long_unbreakable_lines_with_continuation(self):
1205 eq = self.ndiffAssertEqual
1206 msg = Message()
1207 t = """\
1208iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1209 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1210 msg['Face-1'] = t
1211 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001212 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001213 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001214 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001215 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001216Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001217 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001218 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001219Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001220 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001221 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001222Face-3:\x20
1223 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1224 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001225
1226""")
1227
1228 def test_another_long_multiline_header(self):
1229 eq = self.ndiffAssertEqual
1230 m = ('Received: from siimage.com '
1231 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001232 'Microsoft SMTPSVC(5.0.2195.4905); '
1233 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001234 msg = email.message_from_string(m)
1235 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001236Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1237 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001238
1239''')
1240
1241 def test_long_lines_with_different_header(self):
1242 eq = self.ndiffAssertEqual
1243 h = ('List-Unsubscribe: '
1244 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1245 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1246 '?subject=unsubscribe>')
1247 msg = Message()
1248 msg['List'] = h
1249 msg['List'] = Header(h, header_name='List')
1250 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001251List: List-Unsubscribe:
1252 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001253 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001254List: List-Unsubscribe:
1255 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001256 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001257
1258""")
1259
R. David Murray6f0022d2011-01-07 21:57:25 +00001260 def test_long_rfc2047_header_with_embedded_fws(self):
1261 h = Header(textwrap.dedent("""\
1262 We're going to pretend this header is in a non-ascii character set
1263 \tto see if line wrapping with encoded words and embedded
1264 folding white space works"""),
1265 charset='utf-8',
1266 header_name='Test')
1267 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1268 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1269 =?utf-8?q?cter_set?=
1270 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1271 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1272
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001273
Ezio Melottib3aedd42010-11-20 19:04:17 +00001274
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001275# Test mangling of "From " lines in the body of a message
1276class TestFromMangling(unittest.TestCase):
1277 def setUp(self):
1278 self.msg = Message()
1279 self.msg['From'] = 'aaa@bbb.org'
1280 self.msg.set_payload("""\
1281From the desk of A.A.A.:
1282Blah blah blah
1283""")
1284
1285 def test_mangled_from(self):
1286 s = StringIO()
1287 g = Generator(s, mangle_from_=True)
1288 g.flatten(self.msg)
1289 self.assertEqual(s.getvalue(), """\
1290From: aaa@bbb.org
1291
1292>From the desk of A.A.A.:
1293Blah blah blah
1294""")
1295
1296 def test_dont_mangle_from(self):
1297 s = StringIO()
1298 g = Generator(s, mangle_from_=False)
1299 g.flatten(self.msg)
1300 self.assertEqual(s.getvalue(), """\
1301From: aaa@bbb.org
1302
1303From the desk of A.A.A.:
1304Blah blah blah
1305""")
1306
R David Murray6a31bc62012-07-22 21:47:53 -04001307 def test_mangle_from_in_preamble_and_epilog(self):
1308 s = StringIO()
1309 g = Generator(s, mangle_from_=True)
1310 msg = email.message_from_string(textwrap.dedent("""\
1311 From: foo@bar.com
1312 Mime-Version: 1.0
1313 Content-Type: multipart/mixed; boundary=XXX
1314
1315 From somewhere unknown
1316
1317 --XXX
1318 Content-Type: text/plain
1319
1320 foo
1321
1322 --XXX--
1323
1324 From somewhere unknowable
1325 """))
1326 g.flatten(msg)
1327 self.assertEqual(len([1 for x in s.getvalue().split('\n')
1328 if x.startswith('>From ')]), 2)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001329
R David Murray638d40b2012-08-24 11:14:13 -04001330 def test_mangled_from_with_bad_bytes(self):
1331 source = textwrap.dedent("""\
1332 Content-Type: text/plain; charset="utf-8"
1333 MIME-Version: 1.0
1334 Content-Transfer-Encoding: 8bit
1335 From: aaa@bbb.org
1336
1337 """).encode('utf-8')
1338 msg = email.message_from_bytes(source + b'From R\xc3\xb6lli\n')
1339 b = BytesIO()
1340 g = BytesGenerator(b, mangle_from_=True)
1341 g.flatten(msg)
1342 self.assertEqual(b.getvalue(), source + b'>From R\xc3\xb6lli\n')
1343
Ezio Melottib3aedd42010-11-20 19:04:17 +00001344
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001345# Test the basic MIMEAudio class
1346class TestMIMEAudio(unittest.TestCase):
1347 def setUp(self):
R David Murray28346b82011-03-31 11:40:20 -04001348 with openfile('audiotest.au', 'rb') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001349 self._audiodata = fp.read()
1350 self._au = MIMEAudio(self._audiodata)
1351
1352 def test_guess_minor_type(self):
1353 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1354
1355 def test_encoding(self):
1356 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001357 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1358 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001359
1360 def test_checkSetMinor(self):
1361 au = MIMEAudio(self._audiodata, 'fish')
1362 self.assertEqual(au.get_content_type(), 'audio/fish')
1363
1364 def test_add_header(self):
1365 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001366 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001367 self._au.add_header('Content-Disposition', 'attachment',
1368 filename='audiotest.au')
1369 eq(self._au['content-disposition'],
1370 'attachment; filename="audiotest.au"')
1371 eq(self._au.get_params(header='content-disposition'),
1372 [('attachment', ''), ('filename', 'audiotest.au')])
1373 eq(self._au.get_param('filename', header='content-disposition'),
1374 'audiotest.au')
1375 missing = []
1376 eq(self._au.get_param('attachment', header='content-disposition'), '')
1377 unless(self._au.get_param('foo', failobj=missing,
1378 header='content-disposition') is missing)
1379 # Try some missing stuff
1380 unless(self._au.get_param('foobar', missing) is missing)
1381 unless(self._au.get_param('attachment', missing,
1382 header='foobar') is missing)
1383
1384
Ezio Melottib3aedd42010-11-20 19:04:17 +00001385
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001386# Test the basic MIMEImage class
1387class TestMIMEImage(unittest.TestCase):
1388 def setUp(self):
1389 with openfile('PyBanner048.gif', 'rb') as fp:
1390 self._imgdata = fp.read()
1391 self._im = MIMEImage(self._imgdata)
1392
1393 def test_guess_minor_type(self):
1394 self.assertEqual(self._im.get_content_type(), 'image/gif')
1395
1396 def test_encoding(self):
1397 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001398 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1399 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001400
1401 def test_checkSetMinor(self):
1402 im = MIMEImage(self._imgdata, 'fish')
1403 self.assertEqual(im.get_content_type(), 'image/fish')
1404
1405 def test_add_header(self):
1406 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001407 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001408 self._im.add_header('Content-Disposition', 'attachment',
1409 filename='dingusfish.gif')
1410 eq(self._im['content-disposition'],
1411 'attachment; filename="dingusfish.gif"')
1412 eq(self._im.get_params(header='content-disposition'),
1413 [('attachment', ''), ('filename', 'dingusfish.gif')])
1414 eq(self._im.get_param('filename', header='content-disposition'),
1415 'dingusfish.gif')
1416 missing = []
1417 eq(self._im.get_param('attachment', header='content-disposition'), '')
1418 unless(self._im.get_param('foo', failobj=missing,
1419 header='content-disposition') is missing)
1420 # Try some missing stuff
1421 unless(self._im.get_param('foobar', missing) is missing)
1422 unless(self._im.get_param('attachment', missing,
1423 header='foobar') is missing)
1424
1425
Ezio Melottib3aedd42010-11-20 19:04:17 +00001426
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001427# Test the basic MIMEApplication class
1428class TestMIMEApplication(unittest.TestCase):
1429 def test_headers(self):
1430 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001431 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001432 eq(msg.get_content_type(), 'application/octet-stream')
1433 eq(msg['content-transfer-encoding'], 'base64')
1434
1435 def test_body(self):
1436 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001437 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1438 msg = MIMEApplication(bytesdata)
1439 # whitespace in the cte encoded block is RFC-irrelevant.
1440 eq(msg.get_payload().strip(), '+vv8/f7/')
1441 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001442
R David Murrayec317a82013-02-11 10:51:28 -05001443 def test_binary_body_with_encode_7or8bit(self):
1444 # Issue 17171.
1445 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1446 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_7or8bit)
1447 # Treated as a string, this will be invalid code points.
1448 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1449 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1450 self.assertEqual(msg['Content-Transfer-Encoding'], '8bit')
1451 s = BytesIO()
1452 g = BytesGenerator(s)
1453 g.flatten(msg)
1454 wireform = s.getvalue()
1455 msg2 = email.message_from_bytes(wireform)
1456 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1457 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
1458 self.assertEqual(msg2['Content-Transfer-Encoding'], '8bit')
1459
1460 def test_binary_body_with_encode_noop(self):
R David Murrayceaa8b12013-02-09 13:02:58 -05001461 # Issue 16564: This does not produce an RFC valid message, since to be
1462 # valid it should have a CTE of binary. But the below works in
1463 # Python2, and is documented as working this way.
1464 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1465 msg = MIMEApplication(bytesdata, _encoder=encoders.encode_noop)
1466 # Treated as a string, this will be invalid code points.
1467 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1468 self.assertEqual(msg.get_payload(decode=True), bytesdata)
1469 s = BytesIO()
1470 g = BytesGenerator(s)
1471 g.flatten(msg)
1472 wireform = s.getvalue()
1473 msg2 = email.message_from_bytes(wireform)
1474 self.assertEqual(msg.get_payload(), '\uFFFD' * len(bytesdata))
1475 self.assertEqual(msg2.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001476
Ezio Melottib3aedd42010-11-20 19:04:17 +00001477
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001478# Test the basic MIMEText class
1479class TestMIMEText(unittest.TestCase):
1480 def setUp(self):
1481 self._msg = MIMEText('hello there')
1482
1483 def test_types(self):
1484 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001485 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001486 eq(self._msg.get_content_type(), 'text/plain')
1487 eq(self._msg.get_param('charset'), 'us-ascii')
1488 missing = []
1489 unless(self._msg.get_param('foobar', missing) is missing)
1490 unless(self._msg.get_param('charset', missing, header='foobar')
1491 is missing)
1492
1493 def test_payload(self):
1494 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001495 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001496
1497 def test_charset(self):
1498 eq = self.assertEqual
1499 msg = MIMEText('hello there', _charset='us-ascii')
1500 eq(msg.get_charset().input_charset, 'us-ascii')
1501 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1502
R. David Murray850fc852010-06-03 01:58:28 +00001503 def test_7bit_input(self):
1504 eq = self.assertEqual
1505 msg = MIMEText('hello there', _charset='us-ascii')
1506 eq(msg.get_charset().input_charset, 'us-ascii')
1507 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1508
1509 def test_7bit_input_no_charset(self):
1510 eq = self.assertEqual
1511 msg = MIMEText('hello there')
1512 eq(msg.get_charset(), 'us-ascii')
1513 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1514 self.assertTrue('hello there' in msg.as_string())
1515
1516 def test_utf8_input(self):
1517 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1518 eq = self.assertEqual
1519 msg = MIMEText(teststr, _charset='utf-8')
1520 eq(msg.get_charset().output_charset, 'utf-8')
1521 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1522 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1523
1524 @unittest.skip("can't fix because of backward compat in email5, "
1525 "will fix in email6")
1526 def test_utf8_input_no_charset(self):
1527 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1528 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1529
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001530
Ezio Melottib3aedd42010-11-20 19:04:17 +00001531
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001532# Test complicated multipart/* messages
1533class TestMultipart(TestEmailBase):
1534 def setUp(self):
1535 with openfile('PyBanner048.gif', 'rb') as fp:
1536 data = fp.read()
1537 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1538 image = MIMEImage(data, name='dingusfish.gif')
1539 image.add_header('content-disposition', 'attachment',
1540 filename='dingusfish.gif')
1541 intro = MIMEText('''\
1542Hi there,
1543
1544This is the dingus fish.
1545''')
1546 container.attach(intro)
1547 container.attach(image)
1548 container['From'] = 'Barry <barry@digicool.com>'
1549 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1550 container['Subject'] = 'Here is your dingus fish'
1551
1552 now = 987809702.54848599
1553 timetuple = time.localtime(now)
1554 if timetuple[-1] == 0:
1555 tzsecs = time.timezone
1556 else:
1557 tzsecs = time.altzone
1558 if tzsecs > 0:
1559 sign = '-'
1560 else:
1561 sign = '+'
1562 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1563 container['Date'] = time.strftime(
1564 '%a, %d %b %Y %H:%M:%S',
1565 time.localtime(now)) + tzoffset
1566 self._msg = container
1567 self._im = image
1568 self._txt = intro
1569
1570 def test_hierarchy(self):
1571 # convenience
1572 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001573 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001574 raises = self.assertRaises
1575 # tests
1576 m = self._msg
1577 unless(m.is_multipart())
1578 eq(m.get_content_type(), 'multipart/mixed')
1579 eq(len(m.get_payload()), 2)
1580 raises(IndexError, m.get_payload, 2)
1581 m0 = m.get_payload(0)
1582 m1 = m.get_payload(1)
1583 unless(m0 is self._txt)
1584 unless(m1 is self._im)
1585 eq(m.get_payload(), [m0, m1])
1586 unless(not m0.is_multipart())
1587 unless(not m1.is_multipart())
1588
1589 def test_empty_multipart_idempotent(self):
1590 text = """\
1591Content-Type: multipart/mixed; boundary="BOUNDARY"
1592MIME-Version: 1.0
1593Subject: A subject
1594To: aperson@dom.ain
1595From: bperson@dom.ain
1596
1597
1598--BOUNDARY
1599
1600
1601--BOUNDARY--
1602"""
1603 msg = Parser().parsestr(text)
1604 self.ndiffAssertEqual(text, msg.as_string())
1605
1606 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1607 outer = MIMEBase('multipart', 'mixed')
1608 outer['Subject'] = 'A subject'
1609 outer['To'] = 'aperson@dom.ain'
1610 outer['From'] = 'bperson@dom.ain'
1611 outer.set_boundary('BOUNDARY')
1612 self.ndiffAssertEqual(outer.as_string(), '''\
1613Content-Type: multipart/mixed; boundary="BOUNDARY"
1614MIME-Version: 1.0
1615Subject: A subject
1616To: aperson@dom.ain
1617From: bperson@dom.ain
1618
1619--BOUNDARY
1620
1621--BOUNDARY--''')
1622
1623 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1624 outer = MIMEBase('multipart', 'mixed')
1625 outer['Subject'] = 'A subject'
1626 outer['To'] = 'aperson@dom.ain'
1627 outer['From'] = 'bperson@dom.ain'
1628 outer.preamble = ''
1629 outer.epilogue = ''
1630 outer.set_boundary('BOUNDARY')
1631 self.ndiffAssertEqual(outer.as_string(), '''\
1632Content-Type: multipart/mixed; boundary="BOUNDARY"
1633MIME-Version: 1.0
1634Subject: A subject
1635To: aperson@dom.ain
1636From: bperson@dom.ain
1637
1638
1639--BOUNDARY
1640
1641--BOUNDARY--
1642''')
1643
1644 def test_one_part_in_a_multipart(self):
1645 eq = self.ndiffAssertEqual
1646 outer = MIMEBase('multipart', 'mixed')
1647 outer['Subject'] = 'A subject'
1648 outer['To'] = 'aperson@dom.ain'
1649 outer['From'] = 'bperson@dom.ain'
1650 outer.set_boundary('BOUNDARY')
1651 msg = MIMEText('hello world')
1652 outer.attach(msg)
1653 eq(outer.as_string(), '''\
1654Content-Type: multipart/mixed; boundary="BOUNDARY"
1655MIME-Version: 1.0
1656Subject: A subject
1657To: aperson@dom.ain
1658From: bperson@dom.ain
1659
1660--BOUNDARY
1661Content-Type: text/plain; charset="us-ascii"
1662MIME-Version: 1.0
1663Content-Transfer-Encoding: 7bit
1664
1665hello world
1666--BOUNDARY--''')
1667
1668 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1669 eq = self.ndiffAssertEqual
1670 outer = MIMEBase('multipart', 'mixed')
1671 outer['Subject'] = 'A subject'
1672 outer['To'] = 'aperson@dom.ain'
1673 outer['From'] = 'bperson@dom.ain'
1674 outer.preamble = ''
1675 msg = MIMEText('hello world')
1676 outer.attach(msg)
1677 outer.set_boundary('BOUNDARY')
1678 eq(outer.as_string(), '''\
1679Content-Type: multipart/mixed; boundary="BOUNDARY"
1680MIME-Version: 1.0
1681Subject: A subject
1682To: aperson@dom.ain
1683From: bperson@dom.ain
1684
1685
1686--BOUNDARY
1687Content-Type: text/plain; charset="us-ascii"
1688MIME-Version: 1.0
1689Content-Transfer-Encoding: 7bit
1690
1691hello world
1692--BOUNDARY--''')
1693
1694
1695 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1696 eq = self.ndiffAssertEqual
1697 outer = MIMEBase('multipart', 'mixed')
1698 outer['Subject'] = 'A subject'
1699 outer['To'] = 'aperson@dom.ain'
1700 outer['From'] = 'bperson@dom.ain'
1701 outer.preamble = None
1702 msg = MIMEText('hello world')
1703 outer.attach(msg)
1704 outer.set_boundary('BOUNDARY')
1705 eq(outer.as_string(), '''\
1706Content-Type: multipart/mixed; boundary="BOUNDARY"
1707MIME-Version: 1.0
1708Subject: A subject
1709To: aperson@dom.ain
1710From: bperson@dom.ain
1711
1712--BOUNDARY
1713Content-Type: text/plain; charset="us-ascii"
1714MIME-Version: 1.0
1715Content-Transfer-Encoding: 7bit
1716
1717hello world
1718--BOUNDARY--''')
1719
1720
1721 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1722 eq = self.ndiffAssertEqual
1723 outer = MIMEBase('multipart', 'mixed')
1724 outer['Subject'] = 'A subject'
1725 outer['To'] = 'aperson@dom.ain'
1726 outer['From'] = 'bperson@dom.ain'
1727 outer.epilogue = None
1728 msg = MIMEText('hello world')
1729 outer.attach(msg)
1730 outer.set_boundary('BOUNDARY')
1731 eq(outer.as_string(), '''\
1732Content-Type: multipart/mixed; boundary="BOUNDARY"
1733MIME-Version: 1.0
1734Subject: A subject
1735To: aperson@dom.ain
1736From: bperson@dom.ain
1737
1738--BOUNDARY
1739Content-Type: text/plain; charset="us-ascii"
1740MIME-Version: 1.0
1741Content-Transfer-Encoding: 7bit
1742
1743hello world
1744--BOUNDARY--''')
1745
1746
1747 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1748 eq = self.ndiffAssertEqual
1749 outer = MIMEBase('multipart', 'mixed')
1750 outer['Subject'] = 'A subject'
1751 outer['To'] = 'aperson@dom.ain'
1752 outer['From'] = 'bperson@dom.ain'
1753 outer.epilogue = ''
1754 msg = MIMEText('hello world')
1755 outer.attach(msg)
1756 outer.set_boundary('BOUNDARY')
1757 eq(outer.as_string(), '''\
1758Content-Type: multipart/mixed; boundary="BOUNDARY"
1759MIME-Version: 1.0
1760Subject: A subject
1761To: aperson@dom.ain
1762From: bperson@dom.ain
1763
1764--BOUNDARY
1765Content-Type: text/plain; charset="us-ascii"
1766MIME-Version: 1.0
1767Content-Transfer-Encoding: 7bit
1768
1769hello world
1770--BOUNDARY--
1771''')
1772
1773
1774 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1775 eq = self.ndiffAssertEqual
1776 outer = MIMEBase('multipart', 'mixed')
1777 outer['Subject'] = 'A subject'
1778 outer['To'] = 'aperson@dom.ain'
1779 outer['From'] = 'bperson@dom.ain'
1780 outer.epilogue = '\n'
1781 msg = MIMEText('hello world')
1782 outer.attach(msg)
1783 outer.set_boundary('BOUNDARY')
1784 eq(outer.as_string(), '''\
1785Content-Type: multipart/mixed; boundary="BOUNDARY"
1786MIME-Version: 1.0
1787Subject: A subject
1788To: aperson@dom.ain
1789From: bperson@dom.ain
1790
1791--BOUNDARY
1792Content-Type: text/plain; charset="us-ascii"
1793MIME-Version: 1.0
1794Content-Transfer-Encoding: 7bit
1795
1796hello world
1797--BOUNDARY--
1798
1799''')
1800
1801 def test_message_external_body(self):
1802 eq = self.assertEqual
1803 msg = self._msgobj('msg_36.txt')
1804 eq(len(msg.get_payload()), 2)
1805 msg1 = msg.get_payload(1)
1806 eq(msg1.get_content_type(), 'multipart/alternative')
1807 eq(len(msg1.get_payload()), 2)
1808 for subpart in msg1.get_payload():
1809 eq(subpart.get_content_type(), 'message/external-body')
1810 eq(len(subpart.get_payload()), 1)
1811 subsubpart = subpart.get_payload(0)
1812 eq(subsubpart.get_content_type(), 'text/plain')
1813
1814 def test_double_boundary(self):
1815 # msg_37.txt is a multipart that contains two dash-boundary's in a
1816 # row. Our interpretation of RFC 2046 calls for ignoring the second
1817 # and subsequent boundaries.
1818 msg = self._msgobj('msg_37.txt')
1819 self.assertEqual(len(msg.get_payload()), 3)
1820
1821 def test_nested_inner_contains_outer_boundary(self):
1822 eq = self.ndiffAssertEqual
1823 # msg_38.txt has an inner part that contains outer boundaries. My
1824 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1825 # these are illegal and should be interpreted as unterminated inner
1826 # parts.
1827 msg = self._msgobj('msg_38.txt')
1828 sfp = StringIO()
1829 iterators._structure(msg, sfp)
1830 eq(sfp.getvalue(), """\
1831multipart/mixed
1832 multipart/mixed
1833 multipart/alternative
1834 text/plain
1835 text/plain
1836 text/plain
1837 text/plain
1838""")
1839
1840 def test_nested_with_same_boundary(self):
1841 eq = self.ndiffAssertEqual
1842 # msg 39.txt is similarly evil in that it's got inner parts that use
1843 # the same boundary as outer parts. Again, I believe the way this is
1844 # parsed is closest to the spirit of RFC 2046
1845 msg = self._msgobj('msg_39.txt')
1846 sfp = StringIO()
1847 iterators._structure(msg, sfp)
1848 eq(sfp.getvalue(), """\
1849multipart/mixed
1850 multipart/mixed
1851 multipart/alternative
1852 application/octet-stream
1853 application/octet-stream
1854 text/plain
1855""")
1856
1857 def test_boundary_in_non_multipart(self):
1858 msg = self._msgobj('msg_40.txt')
1859 self.assertEqual(msg.as_string(), '''\
1860MIME-Version: 1.0
1861Content-Type: text/html; boundary="--961284236552522269"
1862
1863----961284236552522269
1864Content-Type: text/html;
1865Content-Transfer-Encoding: 7Bit
1866
1867<html></html>
1868
1869----961284236552522269--
1870''')
1871
1872 def test_boundary_with_leading_space(self):
1873 eq = self.assertEqual
1874 msg = email.message_from_string('''\
1875MIME-Version: 1.0
1876Content-Type: multipart/mixed; boundary=" XXXX"
1877
1878-- XXXX
1879Content-Type: text/plain
1880
1881
1882-- XXXX
1883Content-Type: text/plain
1884
1885-- XXXX--
1886''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001887 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001888 eq(msg.get_boundary(), ' XXXX')
1889 eq(len(msg.get_payload()), 2)
1890
1891 def test_boundary_without_trailing_newline(self):
1892 m = Parser().parsestr("""\
1893Content-Type: multipart/mixed; boundary="===============0012394164=="
1894MIME-Version: 1.0
1895
1896--===============0012394164==
1897Content-Type: image/file1.jpg
1898MIME-Version: 1.0
1899Content-Transfer-Encoding: base64
1900
1901YXNkZg==
1902--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001903 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001904
1905
Ezio Melottib3aedd42010-11-20 19:04:17 +00001906
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001907# Test some badly formatted messages
R David Murrayc27e5222012-05-25 15:01:48 -04001908class TestNonConformant(TestEmailBase):
R David Murray3edd22a2011-04-18 13:59:37 -04001909
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001910 def test_parse_missing_minor_type(self):
1911 eq = self.assertEqual
1912 msg = self._msgobj('msg_14.txt')
1913 eq(msg.get_content_type(), 'text/plain')
1914 eq(msg.get_content_maintype(), 'text')
1915 eq(msg.get_content_subtype(), 'plain')
1916
R David Murray80e0aee2012-05-27 21:23:34 -04001917 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001918 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001919 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001920 msg = self._msgobj('msg_15.txt')
1921 # XXX We can probably eventually do better
1922 inner = msg.get_payload(0)
1923 unless(hasattr(inner, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04001924 self.assertEqual(len(inner.defects), 1)
1925 unless(isinstance(inner.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001926 errors.StartBoundaryNotFoundDefect))
1927
R David Murray80e0aee2012-05-27 21:23:34 -04001928 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001929 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001930 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001931 msg = self._msgobj('msg_25.txt')
1932 unless(isinstance(msg.get_payload(), str))
R David Murrayc27e5222012-05-25 15:01:48 -04001933 self.assertEqual(len(msg.defects), 2)
1934 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04001935 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04001936 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001937 errors.MultipartInvariantViolationDefect))
1938
R David Murray749073a2011-06-22 13:47:53 -04001939 multipart_msg = textwrap.dedent("""\
1940 Date: Wed, 14 Nov 2007 12:56:23 GMT
1941 From: foo@bar.invalid
1942 To: foo@bar.invalid
1943 Subject: Content-Transfer-Encoding: base64 and multipart
1944 MIME-Version: 1.0
1945 Content-Type: multipart/mixed;
1946 boundary="===============3344438784458119861=="{}
1947
1948 --===============3344438784458119861==
1949 Content-Type: text/plain
1950
1951 Test message
1952
1953 --===============3344438784458119861==
1954 Content-Type: application/octet-stream
1955 Content-Transfer-Encoding: base64
1956
1957 YWJj
1958
1959 --===============3344438784458119861==--
1960 """)
1961
R David Murray80e0aee2012-05-27 21:23:34 -04001962 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04001963 def test_multipart_invalid_cte(self):
R David Murrayc27e5222012-05-25 15:01:48 -04001964 msg = self._str_msg(
1965 self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
1966 self.assertEqual(len(msg.defects), 1)
1967 self.assertIsInstance(msg.defects[0],
R David Murray749073a2011-06-22 13:47:53 -04001968 errors.InvalidMultipartContentTransferEncodingDefect)
1969
R David Murray80e0aee2012-05-27 21:23:34 -04001970 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04001971 def test_multipart_no_cte_no_defect(self):
R David Murrayc27e5222012-05-25 15:01:48 -04001972 msg = self._str_msg(self.multipart_msg.format(''))
1973 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04001974
R David Murray80e0aee2012-05-27 21:23:34 -04001975 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04001976 def test_multipart_valid_cte_no_defect(self):
1977 for cte in ('7bit', '8bit', 'BINary'):
R David Murrayc27e5222012-05-25 15:01:48 -04001978 msg = self._str_msg(
R David Murray749073a2011-06-22 13:47:53 -04001979 self.multipart_msg.format(
R David Murrayc27e5222012-05-25 15:01:48 -04001980 "\nContent-Transfer-Encoding: {}".format(cte)))
1981 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04001982
R David Murray97f43c02012-06-24 05:03:27 -04001983 # test_headerregistry.TestContentTyopeHeader invalid_1 and invalid_2.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001984 def test_invalid_content_type(self):
1985 eq = self.assertEqual
1986 neq = self.ndiffAssertEqual
1987 msg = Message()
1988 # RFC 2045, $5.2 says invalid yields text/plain
1989 msg['Content-Type'] = 'text'
1990 eq(msg.get_content_maintype(), 'text')
1991 eq(msg.get_content_subtype(), 'plain')
1992 eq(msg.get_content_type(), 'text/plain')
1993 # Clear the old value and try something /really/ invalid
1994 del msg['content-type']
1995 msg['Content-Type'] = 'foo'
1996 eq(msg.get_content_maintype(), 'text')
1997 eq(msg.get_content_subtype(), 'plain')
1998 eq(msg.get_content_type(), 'text/plain')
1999 # Still, make sure that the message is idempotently generated
2000 s = StringIO()
2001 g = Generator(s)
2002 g.flatten(msg)
2003 neq(s.getvalue(), 'Content-Type: foo\n\n')
2004
2005 def test_no_start_boundary(self):
2006 eq = self.ndiffAssertEqual
2007 msg = self._msgobj('msg_31.txt')
2008 eq(msg.get_payload(), """\
2009--BOUNDARY
2010Content-Type: text/plain
2011
2012message 1
2013
2014--BOUNDARY
2015Content-Type: text/plain
2016
2017message 2
2018
2019--BOUNDARY--
2020""")
2021
2022 def test_no_separating_blank_line(self):
2023 eq = self.ndiffAssertEqual
2024 msg = self._msgobj('msg_35.txt')
2025 eq(msg.as_string(), """\
2026From: aperson@dom.ain
2027To: bperson@dom.ain
2028Subject: here's something interesting
2029
2030counter to RFC 2822, there's no separating newline here
2031""")
2032
R David Murray80e0aee2012-05-27 21:23:34 -04002033 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002034 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002035 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002036 msg = self._msgobj('msg_41.txt')
2037 unless(hasattr(msg, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04002038 self.assertEqual(len(msg.defects), 2)
2039 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04002040 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04002041 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002042 errors.MultipartInvariantViolationDefect))
2043
R David Murray80e0aee2012-05-27 21:23:34 -04002044 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002045 def test_missing_start_boundary(self):
2046 outer = self._msgobj('msg_42.txt')
2047 # The message structure is:
2048 #
2049 # multipart/mixed
2050 # text/plain
2051 # message/rfc822
2052 # multipart/mixed [*]
2053 #
2054 # [*] This message is missing its start boundary
2055 bad = outer.get_payload(1).get_payload(0)
R David Murrayc27e5222012-05-25 15:01:48 -04002056 self.assertEqual(len(bad.defects), 1)
2057 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002058 errors.StartBoundaryNotFoundDefect))
2059
R David Murray80e0aee2012-05-27 21:23:34 -04002060 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002061 def test_first_line_is_continuation_header(self):
2062 eq = self.assertEqual
R David Murrayadbdcdb2012-05-27 20:45:01 -04002063 m = ' Line 1\nSubject: test\n\nbody'
R David Murrayc27e5222012-05-25 15:01:48 -04002064 msg = email.message_from_string(m)
R David Murrayadbdcdb2012-05-27 20:45:01 -04002065 eq(msg.keys(), ['Subject'])
2066 eq(msg.get_payload(), 'body')
R David Murrayc27e5222012-05-25 15:01:48 -04002067 eq(len(msg.defects), 1)
R David Murrayadbdcdb2012-05-27 20:45:01 -04002068 self.assertDefectsEqual(msg.defects,
2069 [errors.FirstHeaderLineIsContinuationDefect])
R David Murrayc27e5222012-05-25 15:01:48 -04002070 eq(msg.defects[0].line, ' Line 1\n')
R David Murray3edd22a2011-04-18 13:59:37 -04002071
R David Murrayd41595b2012-05-28 20:14:10 -04002072 # test_defect_handling
R David Murrayadbdcdb2012-05-27 20:45:01 -04002073 def test_missing_header_body_separator(self):
2074 # Our heuristic if we see a line that doesn't look like a header (no
2075 # leading whitespace but no ':') is to assume that the blank line that
2076 # separates the header from the body is missing, and to stop parsing
2077 # headers and start parsing the body.
2078 msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
2079 self.assertEqual(msg.keys(), ['Subject'])
2080 self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
2081 self.assertDefectsEqual(msg.defects,
2082 [errors.MissingHeaderBodySeparatorDefect])
2083
Ezio Melottib3aedd42010-11-20 19:04:17 +00002084
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002085# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00002086class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002087 def test_rfc2047_multiline(self):
2088 eq = self.assertEqual
2089 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
2090 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
2091 dh = decode_header(s)
2092 eq(dh, [
R David Murray07ea53c2012-06-02 17:56:49 -04002093 (b'Re: ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002094 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
R David Murray07ea53c2012-06-02 17:56:49 -04002095 (b' baz foo bar ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002096 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2097 header = make_header(dh)
2098 eq(str(header),
2099 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00002100 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002101Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2102 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002103
R David Murray07ea53c2012-06-02 17:56:49 -04002104 def test_whitespace_keeper_unicode(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002105 eq = self.assertEqual
2106 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2107 dh = decode_header(s)
2108 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
R David Murray07ea53c2012-06-02 17:56:49 -04002109 (b' Pirard <pirard@dom.ain>', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002110 header = str(make_header(dh))
2111 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2112
R David Murray07ea53c2012-06-02 17:56:49 -04002113 def test_whitespace_keeper_unicode_2(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002114 eq = self.assertEqual
2115 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2116 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002117 eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'),
2118 (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002119 hu = str(make_header(dh))
2120 eq(hu, 'The quick brown fox jumped over the lazy dog')
2121
2122 def test_rfc2047_missing_whitespace(self):
2123 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2124 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002125 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2126 (b'rg', None), (b'\xe5', 'iso-8859-1'),
2127 (b'sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002128
2129 def test_rfc2047_with_whitespace(self):
2130 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2131 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002132 self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'),
2133 (b' rg ', None), (b'\xe5', 'iso-8859-1'),
2134 (b' sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002135
R. David Murrayc4e69cc2010-08-03 22:14:10 +00002136 def test_rfc2047_B_bad_padding(self):
2137 s = '=?iso-8859-1?B?%s?='
2138 data = [ # only test complete bytes
2139 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2140 ('dmk=', b'vi'), ('dmk', b'vi')
2141 ]
2142 for q, a in data:
2143 dh = decode_header(s % q)
2144 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002145
R. David Murray31e984c2010-10-01 15:40:20 +00002146 def test_rfc2047_Q_invalid_digits(self):
2147 # issue 10004.
2148 s = '=?iso-8659-1?Q?andr=e9=zz?='
2149 self.assertEqual(decode_header(s),
2150 [(b'andr\xe9=zz', 'iso-8659-1')])
2151
R David Murray07ea53c2012-06-02 17:56:49 -04002152 def test_rfc2047_rfc2047_1(self):
2153 # 1st testcase at end of rfc2047
2154 s = '(=?ISO-8859-1?Q?a?=)'
2155 self.assertEqual(decode_header(s),
2156 [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)])
2157
2158 def test_rfc2047_rfc2047_2(self):
2159 # 2nd testcase at end of rfc2047
2160 s = '(=?ISO-8859-1?Q?a?= b)'
2161 self.assertEqual(decode_header(s),
2162 [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)])
2163
2164 def test_rfc2047_rfc2047_3(self):
2165 # 3rd testcase at end of rfc2047
2166 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2167 self.assertEqual(decode_header(s),
2168 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2169
2170 def test_rfc2047_rfc2047_4(self):
2171 # 4th testcase at end of rfc2047
2172 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2173 self.assertEqual(decode_header(s),
2174 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2175
2176 def test_rfc2047_rfc2047_5a(self):
2177 # 5th testcase at end of rfc2047 newline is \r\n
2178 s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)'
2179 self.assertEqual(decode_header(s),
2180 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2181
2182 def test_rfc2047_rfc2047_5b(self):
2183 # 5th testcase at end of rfc2047 newline is \n
2184 s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)'
2185 self.assertEqual(decode_header(s),
2186 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2187
2188 def test_rfc2047_rfc2047_6(self):
2189 # 6th testcase at end of rfc2047
2190 s = '(=?ISO-8859-1?Q?a_b?=)'
2191 self.assertEqual(decode_header(s),
2192 [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)])
2193
2194 def test_rfc2047_rfc2047_7(self):
2195 # 7th testcase at end of rfc2047
2196 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)'
2197 self.assertEqual(decode_header(s),
2198 [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'),
2199 (b')', None)])
2200 self.assertEqual(make_header(decode_header(s)).encode(), s.lower())
2201 self.assertEqual(str(make_header(decode_header(s))), '(a b)')
2202
R David Murray82ffabd2012-06-03 12:27:07 -04002203 def test_multiline_header(self):
2204 s = '=?windows-1252?q?=22M=FCller_T=22?=\r\n <T.Mueller@xxx.com>'
2205 self.assertEqual(decode_header(s),
2206 [(b'"M\xfcller T"', 'windows-1252'),
2207 (b'<T.Mueller@xxx.com>', None)])
2208 self.assertEqual(make_header(decode_header(s)).encode(),
2209 ''.join(s.splitlines()))
2210 self.assertEqual(str(make_header(decode_header(s))),
2211 '"Müller T" <T.Mueller@xxx.com>')
2212
Ezio Melottib3aedd42010-11-20 19:04:17 +00002213
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002214# Test the MIMEMessage class
2215class TestMIMEMessage(TestEmailBase):
2216 def setUp(self):
2217 with openfile('msg_11.txt') as fp:
2218 self._text = fp.read()
2219
2220 def test_type_error(self):
2221 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2222
2223 def test_valid_argument(self):
2224 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002225 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002226 subject = 'A sub-message'
2227 m = Message()
2228 m['Subject'] = subject
2229 r = MIMEMessage(m)
2230 eq(r.get_content_type(), 'message/rfc822')
2231 payload = r.get_payload()
2232 unless(isinstance(payload, list))
2233 eq(len(payload), 1)
2234 subpart = payload[0]
2235 unless(subpart is m)
2236 eq(subpart['subject'], subject)
2237
2238 def test_bad_multipart(self):
2239 eq = self.assertEqual
2240 msg1 = Message()
2241 msg1['Subject'] = 'subpart 1'
2242 msg2 = Message()
2243 msg2['Subject'] = 'subpart 2'
2244 r = MIMEMessage(msg1)
2245 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2246
2247 def test_generate(self):
2248 # First craft the message to be encapsulated
2249 m = Message()
2250 m['Subject'] = 'An enclosed message'
2251 m.set_payload('Here is the body of the message.\n')
2252 r = MIMEMessage(m)
2253 r['Subject'] = 'The enclosing message'
2254 s = StringIO()
2255 g = Generator(s)
2256 g.flatten(r)
2257 self.assertEqual(s.getvalue(), """\
2258Content-Type: message/rfc822
2259MIME-Version: 1.0
2260Subject: The enclosing message
2261
2262Subject: An enclosed message
2263
2264Here is the body of the message.
2265""")
2266
2267 def test_parse_message_rfc822(self):
2268 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002269 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002270 msg = self._msgobj('msg_11.txt')
2271 eq(msg.get_content_type(), 'message/rfc822')
2272 payload = msg.get_payload()
2273 unless(isinstance(payload, list))
2274 eq(len(payload), 1)
2275 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002276 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002277 eq(submsg['subject'], 'An enclosed message')
2278 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2279
2280 def test_dsn(self):
2281 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002282 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002283 # msg 16 is a Delivery Status Notification, see RFC 1894
2284 msg = self._msgobj('msg_16.txt')
2285 eq(msg.get_content_type(), 'multipart/report')
2286 unless(msg.is_multipart())
2287 eq(len(msg.get_payload()), 3)
2288 # Subpart 1 is a text/plain, human readable section
2289 subpart = msg.get_payload(0)
2290 eq(subpart.get_content_type(), 'text/plain')
2291 eq(subpart.get_payload(), """\
2292This report relates to a message you sent with the following header fields:
2293
2294 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2295 Date: Sun, 23 Sep 2001 20:10:55 -0700
2296 From: "Ian T. Henry" <henryi@oxy.edu>
2297 To: SoCal Raves <scr@socal-raves.org>
2298 Subject: [scr] yeah for Ians!!
2299
2300Your message cannot be delivered to the following recipients:
2301
2302 Recipient address: jangel1@cougar.noc.ucla.edu
2303 Reason: recipient reached disk quota
2304
2305""")
2306 # Subpart 2 contains the machine parsable DSN information. It
2307 # consists of two blocks of headers, represented by two nested Message
2308 # objects.
2309 subpart = msg.get_payload(1)
2310 eq(subpart.get_content_type(), 'message/delivery-status')
2311 eq(len(subpart.get_payload()), 2)
2312 # message/delivery-status should treat each block as a bunch of
2313 # headers, i.e. a bunch of Message objects.
2314 dsn1 = subpart.get_payload(0)
2315 unless(isinstance(dsn1, Message))
2316 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2317 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2318 # Try a missing one <wink>
2319 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2320 dsn2 = subpart.get_payload(1)
2321 unless(isinstance(dsn2, Message))
2322 eq(dsn2['action'], 'failed')
2323 eq(dsn2.get_params(header='original-recipient'),
2324 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2325 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2326 # Subpart 3 is the original message
2327 subpart = msg.get_payload(2)
2328 eq(subpart.get_content_type(), 'message/rfc822')
2329 payload = subpart.get_payload()
2330 unless(isinstance(payload, list))
2331 eq(len(payload), 1)
2332 subsubpart = payload[0]
2333 unless(isinstance(subsubpart, Message))
2334 eq(subsubpart.get_content_type(), 'text/plain')
2335 eq(subsubpart['message-id'],
2336 '<002001c144a6$8752e060$56104586@oxy.edu>')
2337
2338 def test_epilogue(self):
2339 eq = self.ndiffAssertEqual
2340 with openfile('msg_21.txt') as fp:
2341 text = fp.read()
2342 msg = Message()
2343 msg['From'] = 'aperson@dom.ain'
2344 msg['To'] = 'bperson@dom.ain'
2345 msg['Subject'] = 'Test'
2346 msg.preamble = 'MIME message'
2347 msg.epilogue = 'End of MIME message\n'
2348 msg1 = MIMEText('One')
2349 msg2 = MIMEText('Two')
2350 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2351 msg.attach(msg1)
2352 msg.attach(msg2)
2353 sfp = StringIO()
2354 g = Generator(sfp)
2355 g.flatten(msg)
2356 eq(sfp.getvalue(), text)
2357
2358 def test_no_nl_preamble(self):
2359 eq = self.ndiffAssertEqual
2360 msg = Message()
2361 msg['From'] = 'aperson@dom.ain'
2362 msg['To'] = 'bperson@dom.ain'
2363 msg['Subject'] = 'Test'
2364 msg.preamble = 'MIME message'
2365 msg.epilogue = ''
2366 msg1 = MIMEText('One')
2367 msg2 = MIMEText('Two')
2368 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2369 msg.attach(msg1)
2370 msg.attach(msg2)
2371 eq(msg.as_string(), """\
2372From: aperson@dom.ain
2373To: bperson@dom.ain
2374Subject: Test
2375Content-Type: multipart/mixed; boundary="BOUNDARY"
2376
2377MIME message
2378--BOUNDARY
2379Content-Type: text/plain; charset="us-ascii"
2380MIME-Version: 1.0
2381Content-Transfer-Encoding: 7bit
2382
2383One
2384--BOUNDARY
2385Content-Type: text/plain; charset="us-ascii"
2386MIME-Version: 1.0
2387Content-Transfer-Encoding: 7bit
2388
2389Two
2390--BOUNDARY--
2391""")
2392
2393 def test_default_type(self):
2394 eq = self.assertEqual
2395 with openfile('msg_30.txt') as fp:
2396 msg = email.message_from_file(fp)
2397 container1 = msg.get_payload(0)
2398 eq(container1.get_default_type(), 'message/rfc822')
2399 eq(container1.get_content_type(), 'message/rfc822')
2400 container2 = msg.get_payload(1)
2401 eq(container2.get_default_type(), 'message/rfc822')
2402 eq(container2.get_content_type(), 'message/rfc822')
2403 container1a = container1.get_payload(0)
2404 eq(container1a.get_default_type(), 'text/plain')
2405 eq(container1a.get_content_type(), 'text/plain')
2406 container2a = container2.get_payload(0)
2407 eq(container2a.get_default_type(), 'text/plain')
2408 eq(container2a.get_content_type(), 'text/plain')
2409
2410 def test_default_type_with_explicit_container_type(self):
2411 eq = self.assertEqual
2412 with openfile('msg_28.txt') as fp:
2413 msg = email.message_from_file(fp)
2414 container1 = msg.get_payload(0)
2415 eq(container1.get_default_type(), 'message/rfc822')
2416 eq(container1.get_content_type(), 'message/rfc822')
2417 container2 = msg.get_payload(1)
2418 eq(container2.get_default_type(), 'message/rfc822')
2419 eq(container2.get_content_type(), 'message/rfc822')
2420 container1a = container1.get_payload(0)
2421 eq(container1a.get_default_type(), 'text/plain')
2422 eq(container1a.get_content_type(), 'text/plain')
2423 container2a = container2.get_payload(0)
2424 eq(container2a.get_default_type(), 'text/plain')
2425 eq(container2a.get_content_type(), 'text/plain')
2426
2427 def test_default_type_non_parsed(self):
2428 eq = self.assertEqual
2429 neq = self.ndiffAssertEqual
2430 # Set up container
2431 container = MIMEMultipart('digest', 'BOUNDARY')
2432 container.epilogue = ''
2433 # Set up subparts
2434 subpart1a = MIMEText('message 1\n')
2435 subpart2a = MIMEText('message 2\n')
2436 subpart1 = MIMEMessage(subpart1a)
2437 subpart2 = MIMEMessage(subpart2a)
2438 container.attach(subpart1)
2439 container.attach(subpart2)
2440 eq(subpart1.get_content_type(), 'message/rfc822')
2441 eq(subpart1.get_default_type(), 'message/rfc822')
2442 eq(subpart2.get_content_type(), 'message/rfc822')
2443 eq(subpart2.get_default_type(), 'message/rfc822')
2444 neq(container.as_string(0), '''\
2445Content-Type: multipart/digest; boundary="BOUNDARY"
2446MIME-Version: 1.0
2447
2448--BOUNDARY
2449Content-Type: message/rfc822
2450MIME-Version: 1.0
2451
2452Content-Type: text/plain; charset="us-ascii"
2453MIME-Version: 1.0
2454Content-Transfer-Encoding: 7bit
2455
2456message 1
2457
2458--BOUNDARY
2459Content-Type: message/rfc822
2460MIME-Version: 1.0
2461
2462Content-Type: text/plain; charset="us-ascii"
2463MIME-Version: 1.0
2464Content-Transfer-Encoding: 7bit
2465
2466message 2
2467
2468--BOUNDARY--
2469''')
2470 del subpart1['content-type']
2471 del subpart1['mime-version']
2472 del subpart2['content-type']
2473 del subpart2['mime-version']
2474 eq(subpart1.get_content_type(), 'message/rfc822')
2475 eq(subpart1.get_default_type(), 'message/rfc822')
2476 eq(subpart2.get_content_type(), 'message/rfc822')
2477 eq(subpart2.get_default_type(), 'message/rfc822')
2478 neq(container.as_string(0), '''\
2479Content-Type: multipart/digest; boundary="BOUNDARY"
2480MIME-Version: 1.0
2481
2482--BOUNDARY
2483
2484Content-Type: text/plain; charset="us-ascii"
2485MIME-Version: 1.0
2486Content-Transfer-Encoding: 7bit
2487
2488message 1
2489
2490--BOUNDARY
2491
2492Content-Type: text/plain; charset="us-ascii"
2493MIME-Version: 1.0
2494Content-Transfer-Encoding: 7bit
2495
2496message 2
2497
2498--BOUNDARY--
2499''')
2500
2501 def test_mime_attachments_in_constructor(self):
2502 eq = self.assertEqual
2503 text1 = MIMEText('')
2504 text2 = MIMEText('')
2505 msg = MIMEMultipart(_subparts=(text1, text2))
2506 eq(len(msg.get_payload()), 2)
2507 eq(msg.get_payload(0), text1)
2508 eq(msg.get_payload(1), text2)
2509
Christian Heimes587c2bf2008-01-19 16:21:02 +00002510 def test_default_multipart_constructor(self):
2511 msg = MIMEMultipart()
2512 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002513
Ezio Melottib3aedd42010-11-20 19:04:17 +00002514
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002515# A general test of parser->model->generator idempotency. IOW, read a message
2516# in, parse it into a message object tree, then without touching the tree,
2517# regenerate the plain text. The original text and the transformed text
2518# should be identical. Note: that we ignore the Unix-From since that may
2519# contain a changed date.
2520class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002521
2522 linesep = '\n'
2523
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002524 def _msgobj(self, filename):
2525 with openfile(filename) as fp:
2526 data = fp.read()
2527 msg = email.message_from_string(data)
2528 return msg, data
2529
R. David Murray719a4492010-11-21 16:53:48 +00002530 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002531 eq = self.ndiffAssertEqual
2532 s = StringIO()
2533 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002534 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002535 eq(text, s.getvalue())
2536
2537 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002538 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002539 msg, text = self._msgobj('msg_01.txt')
2540 eq(msg.get_content_type(), 'text/plain')
2541 eq(msg.get_content_maintype(), 'text')
2542 eq(msg.get_content_subtype(), 'plain')
2543 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2544 eq(msg.get_param('charset'), 'us-ascii')
2545 eq(msg.preamble, None)
2546 eq(msg.epilogue, None)
2547 self._idempotent(msg, text)
2548
2549 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002550 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002551 msg, text = self._msgobj('msg_03.txt')
2552 eq(msg.get_content_type(), 'text/plain')
2553 eq(msg.get_params(), None)
2554 eq(msg.get_param('charset'), None)
2555 self._idempotent(msg, text)
2556
2557 def test_simple_multipart(self):
2558 msg, text = self._msgobj('msg_04.txt')
2559 self._idempotent(msg, text)
2560
2561 def test_MIME_digest(self):
2562 msg, text = self._msgobj('msg_02.txt')
2563 self._idempotent(msg, text)
2564
2565 def test_long_header(self):
2566 msg, text = self._msgobj('msg_27.txt')
2567 self._idempotent(msg, text)
2568
2569 def test_MIME_digest_with_part_headers(self):
2570 msg, text = self._msgobj('msg_28.txt')
2571 self._idempotent(msg, text)
2572
2573 def test_mixed_with_image(self):
2574 msg, text = self._msgobj('msg_06.txt')
2575 self._idempotent(msg, text)
2576
2577 def test_multipart_report(self):
2578 msg, text = self._msgobj('msg_05.txt')
2579 self._idempotent(msg, text)
2580
2581 def test_dsn(self):
2582 msg, text = self._msgobj('msg_16.txt')
2583 self._idempotent(msg, text)
2584
2585 def test_preamble_epilogue(self):
2586 msg, text = self._msgobj('msg_21.txt')
2587 self._idempotent(msg, text)
2588
2589 def test_multipart_one_part(self):
2590 msg, text = self._msgobj('msg_23.txt')
2591 self._idempotent(msg, text)
2592
2593 def test_multipart_no_parts(self):
2594 msg, text = self._msgobj('msg_24.txt')
2595 self._idempotent(msg, text)
2596
2597 def test_no_start_boundary(self):
2598 msg, text = self._msgobj('msg_31.txt')
2599 self._idempotent(msg, text)
2600
2601 def test_rfc2231_charset(self):
2602 msg, text = self._msgobj('msg_32.txt')
2603 self._idempotent(msg, text)
2604
2605 def test_more_rfc2231_parameters(self):
2606 msg, text = self._msgobj('msg_33.txt')
2607 self._idempotent(msg, text)
2608
2609 def test_text_plain_in_a_multipart_digest(self):
2610 msg, text = self._msgobj('msg_34.txt')
2611 self._idempotent(msg, text)
2612
2613 def test_nested_multipart_mixeds(self):
2614 msg, text = self._msgobj('msg_12a.txt')
2615 self._idempotent(msg, text)
2616
2617 def test_message_external_body_idempotent(self):
2618 msg, text = self._msgobj('msg_36.txt')
2619 self._idempotent(msg, text)
2620
R. David Murray719a4492010-11-21 16:53:48 +00002621 def test_message_delivery_status(self):
2622 msg, text = self._msgobj('msg_43.txt')
2623 self._idempotent(msg, text, unixfrom=True)
2624
R. David Murray96fd54e2010-10-08 15:55:28 +00002625 def test_message_signed_idempotent(self):
2626 msg, text = self._msgobj('msg_45.txt')
2627 self._idempotent(msg, text)
2628
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002629 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002630 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002631 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002632 # Get a message object and reset the seek pointer for other tests
2633 msg, text = self._msgobj('msg_05.txt')
2634 eq(msg.get_content_type(), 'multipart/report')
2635 # Test the Content-Type: parameters
2636 params = {}
2637 for pk, pv in msg.get_params():
2638 params[pk] = pv
2639 eq(params['report-type'], 'delivery-status')
2640 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002641 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2642 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002643 eq(len(msg.get_payload()), 3)
2644 # Make sure the subparts are what we expect
2645 msg1 = msg.get_payload(0)
2646 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002647 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002648 msg2 = msg.get_payload(1)
2649 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002650 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002651 msg3 = msg.get_payload(2)
2652 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002653 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002654 payload = msg3.get_payload()
2655 unless(isinstance(payload, list))
2656 eq(len(payload), 1)
2657 msg4 = payload[0]
2658 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002659 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002660
2661 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002662 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002663 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002664 msg, text = self._msgobj('msg_06.txt')
2665 # Check some of the outer headers
2666 eq(msg.get_content_type(), 'message/rfc822')
2667 # Make sure the payload is a list of exactly one sub-Message, and that
2668 # that submessage has a type of text/plain
2669 payload = msg.get_payload()
2670 unless(isinstance(payload, list))
2671 eq(len(payload), 1)
2672 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002673 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002674 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002675 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002676 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002677
2678
Ezio Melottib3aedd42010-11-20 19:04:17 +00002679
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002680# Test various other bits of the package's functionality
2681class TestMiscellaneous(TestEmailBase):
2682 def test_message_from_string(self):
2683 with openfile('msg_01.txt') as fp:
2684 text = fp.read()
2685 msg = email.message_from_string(text)
2686 s = StringIO()
2687 # Don't wrap/continue long headers since we're trying to test
2688 # idempotency.
2689 g = Generator(s, maxheaderlen=0)
2690 g.flatten(msg)
2691 self.assertEqual(text, s.getvalue())
2692
2693 def test_message_from_file(self):
2694 with openfile('msg_01.txt') as fp:
2695 text = fp.read()
2696 fp.seek(0)
2697 msg = email.message_from_file(fp)
2698 s = StringIO()
2699 # Don't wrap/continue long headers since we're trying to test
2700 # idempotency.
2701 g = Generator(s, maxheaderlen=0)
2702 g.flatten(msg)
2703 self.assertEqual(text, s.getvalue())
2704
2705 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002706 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002707 with openfile('msg_01.txt') as fp:
2708 text = fp.read()
2709
2710 # Create a subclass
2711 class MyMessage(Message):
2712 pass
2713
2714 msg = email.message_from_string(text, MyMessage)
2715 unless(isinstance(msg, MyMessage))
2716 # Try something more complicated
2717 with openfile('msg_02.txt') as fp:
2718 text = fp.read()
2719 msg = email.message_from_string(text, MyMessage)
2720 for subpart in msg.walk():
2721 unless(isinstance(subpart, MyMessage))
2722
2723 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002724 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002725 # Create a subclass
2726 class MyMessage(Message):
2727 pass
2728
2729 with openfile('msg_01.txt') as fp:
2730 msg = email.message_from_file(fp, MyMessage)
2731 unless(isinstance(msg, MyMessage))
2732 # Try something more complicated
2733 with openfile('msg_02.txt') as fp:
2734 msg = email.message_from_file(fp, MyMessage)
2735 for subpart in msg.walk():
2736 unless(isinstance(subpart, MyMessage))
2737
R David Murrayc27e5222012-05-25 15:01:48 -04002738 def test_custom_message_does_not_require_arguments(self):
2739 class MyMessage(Message):
2740 def __init__(self):
2741 super().__init__()
2742 msg = self._str_msg("Subject: test\n\ntest", MyMessage)
2743 self.assertTrue(isinstance(msg, MyMessage))
2744
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002745 def test__all__(self):
2746 module = __import__('email')
R David Murray1b6c7242012-03-16 22:43:05 -04002747 self.assertEqual(sorted(module.__all__), [
2748 'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2749 'generator', 'header', 'iterators', 'message',
2750 'message_from_binary_file', 'message_from_bytes',
2751 'message_from_file', 'message_from_string', 'mime', 'parser',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002752 'quoprimime', 'utils',
2753 ])
2754
2755 def test_formatdate(self):
2756 now = time.time()
2757 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2758 time.gmtime(now)[:6])
2759
2760 def test_formatdate_localtime(self):
2761 now = time.time()
2762 self.assertEqual(
2763 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2764 time.localtime(now)[:6])
2765
2766 def test_formatdate_usegmt(self):
2767 now = time.time()
2768 self.assertEqual(
2769 utils.formatdate(now, localtime=False),
2770 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2771 self.assertEqual(
2772 utils.formatdate(now, localtime=False, usegmt=True),
2773 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2774
Georg Brandl1aca31e2012-09-22 09:03:56 +02002775 # parsedate and parsedate_tz will become deprecated interfaces someday
2776 def test_parsedate_returns_None_for_invalid_strings(self):
2777 self.assertIsNone(utils.parsedate(''))
2778 self.assertIsNone(utils.parsedate_tz(''))
2779 self.assertIsNone(utils.parsedate('0'))
2780 self.assertIsNone(utils.parsedate_tz('0'))
2781 self.assertIsNone(utils.parsedate('A Complete Waste of Time'))
2782 self.assertIsNone(utils.parsedate_tz('A Complete Waste of Time'))
2783 # Not a part of the spec but, but this has historically worked:
2784 self.assertIsNone(utils.parsedate(None))
2785 self.assertIsNone(utils.parsedate_tz(None))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002786
2787 def test_parsedate_compact(self):
2788 # The FWS after the comma is optional
2789 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2790 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2791
2792 def test_parsedate_no_dayofweek(self):
2793 eq = self.assertEqual
2794 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2795 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2796
2797 def test_parsedate_compact_no_dayofweek(self):
2798 eq = self.assertEqual
2799 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2800 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2801
R. David Murray4a62e892010-12-23 20:35:46 +00002802 def test_parsedate_no_space_before_positive_offset(self):
2803 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2804 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2805
2806 def test_parsedate_no_space_before_negative_offset(self):
2807 # Issue 1155362: we already handled '+' for this case.
2808 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2809 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2810
2811
R David Murrayaccd1c02011-03-13 20:06:23 -04002812 def test_parsedate_accepts_time_with_dots(self):
2813 eq = self.assertEqual
2814 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2815 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2816 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2817 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2818
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002819 def test_parsedate_acceptable_to_time_functions(self):
2820 eq = self.assertEqual
2821 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2822 t = int(time.mktime(timetup))
2823 eq(time.localtime(t)[:6], timetup[:6])
2824 eq(int(time.strftime('%Y', timetup)), 2003)
2825 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2826 t = int(time.mktime(timetup[:9]))
2827 eq(time.localtime(t)[:6], timetup[:6])
2828 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2829
Alexander Belopolskya07548e2012-06-21 20:34:09 -04002830 def test_mktime_tz(self):
2831 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2832 -1, -1, -1, 0)), 0)
2833 self.assertEqual(utils.mktime_tz((1970, 1, 1, 0, 0, 0,
2834 -1, -1, -1, 1234)), -1234)
2835
R. David Murray219d1c82010-08-25 00:45:55 +00002836 def test_parsedate_y2k(self):
2837 """Test for parsing a date with a two-digit year.
2838
2839 Parsing a date with a two-digit year should return the correct
2840 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2841 obsoletes RFC822) requires four-digit years.
2842
2843 """
2844 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2845 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2846 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2847 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2848
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002849 def test_parseaddr_empty(self):
2850 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2851 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2852
2853 def test_noquote_dump(self):
2854 self.assertEqual(
2855 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2856 'A Silly Person <person@dom.ain>')
2857
2858 def test_escape_dump(self):
2859 self.assertEqual(
2860 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
R David Murrayb53319f2012-03-14 15:31:47 -04002861 r'"A (Very) Silly Person" <person@dom.ain>')
2862 self.assertEqual(
2863 utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
2864 ('A (Very) Silly Person', 'person@dom.ain'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002865 a = r'A \(Special\) Person'
2866 b = 'person@dom.ain'
2867 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2868
2869 def test_escape_backslashes(self):
2870 self.assertEqual(
2871 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2872 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2873 a = r'Arthur \Backslash\ Foobar'
2874 b = 'person@dom.ain'
2875 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2876
R David Murray8debacb2011-04-06 09:35:57 -04002877 def test_quotes_unicode_names(self):
2878 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2879 name = "H\u00e4ns W\u00fcrst"
2880 addr = 'person@dom.ain'
2881 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2882 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
2883 self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
2884 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
2885 latin1_quopri)
2886
2887 def test_accepts_any_charset_like_object(self):
2888 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2889 name = "H\u00e4ns W\u00fcrst"
2890 addr = 'person@dom.ain'
2891 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2892 foobar = "FOOBAR"
2893 class CharsetMock:
2894 def header_encode(self, string):
2895 return foobar
2896 mock = CharsetMock()
2897 mock_expected = "%s <%s>" % (foobar, addr)
2898 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
2899 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
2900 utf8_base64)
2901
2902 def test_invalid_charset_like_object_raises_error(self):
2903 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2904 name = "H\u00e4ns W\u00fcrst"
2905 addr = 'person@dom.ain'
2906 # A object without a header_encode method:
2907 bad_charset = object()
2908 self.assertRaises(AttributeError, utils.formataddr, (name, addr),
2909 bad_charset)
2910
2911 def test_unicode_address_raises_error(self):
2912 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2913 addr = 'pers\u00f6n@dom.in'
2914 self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
2915 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
2916
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002917 def test_name_with_dot(self):
2918 x = 'John X. Doe <jxd@example.com>'
2919 y = '"John X. Doe" <jxd@example.com>'
2920 a, b = ('John X. Doe', 'jxd@example.com')
2921 self.assertEqual(utils.parseaddr(x), (a, b))
2922 self.assertEqual(utils.parseaddr(y), (a, b))
2923 # formataddr() quotes the name if there's a dot in it
2924 self.assertEqual(utils.formataddr((a, b)), y)
2925
R. David Murray5397e862010-10-02 15:58:26 +00002926 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2927 # issue 10005. Note that in the third test the second pair of
2928 # backslashes is not actually a quoted pair because it is not inside a
2929 # comment or quoted string: the address being parsed has a quoted
2930 # string containing a quoted backslash, followed by 'example' and two
2931 # backslashes, followed by another quoted string containing a space and
2932 # the word 'example'. parseaddr copies those two backslashes
2933 # literally. Per rfc5322 this is not technically correct since a \ may
2934 # not appear in an address outside of a quoted string. It is probably
2935 # a sensible Postel interpretation, though.
2936 eq = self.assertEqual
2937 eq(utils.parseaddr('""example" example"@example.com'),
2938 ('', '""example" example"@example.com'))
2939 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2940 ('', '"\\"example\\" example"@example.com'))
2941 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2942 ('', '"\\\\"example\\\\" example"@example.com'))
2943
R. David Murray63563cd2010-12-18 18:25:38 +00002944 def test_parseaddr_preserves_spaces_in_local_part(self):
2945 # issue 9286. A normal RFC5322 local part should not contain any
2946 # folding white space, but legacy local parts can (they are a sequence
2947 # of atoms, not dotatoms). On the other hand we strip whitespace from
2948 # before the @ and around dots, on the assumption that the whitespace
2949 # around the punctuation is a mistake in what would otherwise be
2950 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2951 self.assertEqual(('', "merwok wok@xample.com"),
2952 utils.parseaddr("merwok wok@xample.com"))
2953 self.assertEqual(('', "merwok wok@xample.com"),
2954 utils.parseaddr("merwok wok@xample.com"))
2955 self.assertEqual(('', "merwok wok@xample.com"),
2956 utils.parseaddr(" merwok wok @xample.com"))
2957 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2958 utils.parseaddr('merwok"wok" wok@xample.com'))
2959 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2960 utils.parseaddr('merwok. wok . wok@xample.com'))
2961
R David Murrayb53319f2012-03-14 15:31:47 -04002962 def test_formataddr_does_not_quote_parens_in_quoted_string(self):
2963 addr = ("'foo@example.com' (foo@example.com)",
2964 'foo@example.com')
2965 addrstr = ('"\'foo@example.com\' '
2966 '(foo@example.com)" <foo@example.com>')
2967 self.assertEqual(utils.parseaddr(addrstr), addr)
2968 self.assertEqual(utils.formataddr(addr), addrstr)
2969
2970
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002971 def test_multiline_from_comment(self):
2972 x = """\
2973Foo
2974\tBar <foo@example.com>"""
2975 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2976
2977 def test_quote_dump(self):
2978 self.assertEqual(
2979 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2980 r'"A Silly; Person" <person@dom.ain>')
2981
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002982 def test_charset_richcomparisons(self):
2983 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002984 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002985 cset1 = Charset()
2986 cset2 = Charset()
2987 eq(cset1, 'us-ascii')
2988 eq(cset1, 'US-ASCII')
2989 eq(cset1, 'Us-AsCiI')
2990 eq('us-ascii', cset1)
2991 eq('US-ASCII', cset1)
2992 eq('Us-AsCiI', cset1)
2993 ne(cset1, 'usascii')
2994 ne(cset1, 'USASCII')
2995 ne(cset1, 'UsAsCiI')
2996 ne('usascii', cset1)
2997 ne('USASCII', cset1)
2998 ne('UsAsCiI', cset1)
2999 eq(cset1, cset2)
3000 eq(cset2, cset1)
3001
3002 def test_getaddresses(self):
3003 eq = self.assertEqual
3004 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
3005 'Bud Person <bperson@dom.ain>']),
3006 [('Al Person', 'aperson@dom.ain'),
3007 ('Bud Person', 'bperson@dom.ain')])
3008
3009 def test_getaddresses_nasty(self):
3010 eq = self.assertEqual
3011 eq(utils.getaddresses(['foo: ;']), [('', '')])
3012 eq(utils.getaddresses(
3013 ['[]*-- =~$']),
3014 [('', ''), ('', ''), ('', '*--')])
3015 eq(utils.getaddresses(
3016 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
3017 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
3018
3019 def test_getaddresses_embedded_comment(self):
3020 """Test proper handling of a nested comment"""
3021 eq = self.assertEqual
3022 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
3023 eq(addrs[0][1], 'foo@bar.com')
3024
3025 def test_utils_quote_unquote(self):
3026 eq = self.assertEqual
3027 msg = Message()
3028 msg.add_header('content-disposition', 'attachment',
3029 filename='foo\\wacky"name')
3030 eq(msg.get_filename(), 'foo\\wacky"name')
3031
3032 def test_get_body_encoding_with_bogus_charset(self):
3033 charset = Charset('not a charset')
3034 self.assertEqual(charset.get_body_encoding(), 'base64')
3035
3036 def test_get_body_encoding_with_uppercase_charset(self):
3037 eq = self.assertEqual
3038 msg = Message()
3039 msg['Content-Type'] = 'text/plain; charset=UTF-8'
3040 eq(msg['content-type'], 'text/plain; charset=UTF-8')
3041 charsets = msg.get_charsets()
3042 eq(len(charsets), 1)
3043 eq(charsets[0], 'utf-8')
3044 charset = Charset(charsets[0])
3045 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003046 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003047 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
3048 eq(msg.get_payload(decode=True), b'hello world')
3049 eq(msg['content-transfer-encoding'], 'base64')
3050 # Try another one
3051 msg = Message()
3052 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
3053 charsets = msg.get_charsets()
3054 eq(len(charsets), 1)
3055 eq(charsets[0], 'us-ascii')
3056 charset = Charset(charsets[0])
3057 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
3058 msg.set_payload('hello world', charset=charset)
3059 eq(msg.get_payload(), 'hello world')
3060 eq(msg['content-transfer-encoding'], '7bit')
3061
3062 def test_charsets_case_insensitive(self):
3063 lc = Charset('us-ascii')
3064 uc = Charset('US-ASCII')
3065 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
3066
3067 def test_partial_falls_inside_message_delivery_status(self):
3068 eq = self.ndiffAssertEqual
3069 # The Parser interface provides chunks of data to FeedParser in 8192
3070 # byte gulps. SF bug #1076485 found one of those chunks inside
3071 # message/delivery-status header block, which triggered an
3072 # unreadline() of NeedMoreData.
3073 msg = self._msgobj('msg_43.txt')
3074 sfp = StringIO()
3075 iterators._structure(msg, sfp)
3076 eq(sfp.getvalue(), """\
3077multipart/report
3078 text/plain
3079 message/delivery-status
3080 text/plain
3081 text/plain
3082 text/plain
3083 text/plain
3084 text/plain
3085 text/plain
3086 text/plain
3087 text/plain
3088 text/plain
3089 text/plain
3090 text/plain
3091 text/plain
3092 text/plain
3093 text/plain
3094 text/plain
3095 text/plain
3096 text/plain
3097 text/plain
3098 text/plain
3099 text/plain
3100 text/plain
3101 text/plain
3102 text/plain
3103 text/plain
3104 text/plain
3105 text/plain
3106 text/rfc822-headers
3107""")
3108
R. David Murraya0b44b52010-12-02 21:47:19 +00003109 def test_make_msgid_domain(self):
3110 self.assertEqual(
3111 email.utils.make_msgid(domain='testdomain-string')[-19:],
3112 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003113
R David Murraye67c6c52013-03-07 16:38:03 -05003114 def test_Generator_linend(self):
3115 # Issue 14645.
3116 with openfile('msg_26.txt', newline='\n') as f:
3117 msgtxt = f.read()
3118 msgtxt_nl = msgtxt.replace('\r\n', '\n')
3119 msg = email.message_from_string(msgtxt)
3120 s = StringIO()
3121 g = email.generator.Generator(s)
3122 g.flatten(msg)
3123 self.assertEqual(s.getvalue(), msgtxt_nl)
3124
3125 def test_BytesGenerator_linend(self):
3126 # Issue 14645.
3127 with openfile('msg_26.txt', newline='\n') as f:
3128 msgtxt = f.read()
3129 msgtxt_nl = msgtxt.replace('\r\n', '\n')
3130 msg = email.message_from_string(msgtxt_nl)
3131 s = BytesIO()
3132 g = email.generator.BytesGenerator(s)
3133 g.flatten(msg, linesep='\r\n')
3134 self.assertEqual(s.getvalue().decode('ascii'), msgtxt)
3135
3136 def test_BytesGenerator_linend_with_non_ascii(self):
3137 # Issue 14645.
3138 with openfile('msg_26.txt', 'rb') as f:
3139 msgtxt = f.read()
3140 msgtxt = msgtxt.replace(b'with attachment', b'fo\xf6')
3141 msgtxt_nl = msgtxt.replace(b'\r\n', b'\n')
3142 msg = email.message_from_bytes(msgtxt_nl)
3143 s = BytesIO()
3144 g = email.generator.BytesGenerator(s)
3145 g.flatten(msg, linesep='\r\n')
3146 self.assertEqual(s.getvalue(), msgtxt)
3147
Ezio Melottib3aedd42010-11-20 19:04:17 +00003148
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003149# Test the iterator/generators
3150class TestIterators(TestEmailBase):
3151 def test_body_line_iterator(self):
3152 eq = self.assertEqual
3153 neq = self.ndiffAssertEqual
3154 # First a simple non-multipart message
3155 msg = self._msgobj('msg_01.txt')
3156 it = iterators.body_line_iterator(msg)
3157 lines = list(it)
3158 eq(len(lines), 6)
3159 neq(EMPTYSTRING.join(lines), msg.get_payload())
3160 # Now a more complicated multipart
3161 msg = self._msgobj('msg_02.txt')
3162 it = iterators.body_line_iterator(msg)
3163 lines = list(it)
3164 eq(len(lines), 43)
3165 with openfile('msg_19.txt') as fp:
3166 neq(EMPTYSTRING.join(lines), fp.read())
3167
3168 def test_typed_subpart_iterator(self):
3169 eq = self.assertEqual
3170 msg = self._msgobj('msg_04.txt')
3171 it = iterators.typed_subpart_iterator(msg, 'text')
3172 lines = []
3173 subparts = 0
3174 for subpart in it:
3175 subparts += 1
3176 lines.append(subpart.get_payload())
3177 eq(subparts, 2)
3178 eq(EMPTYSTRING.join(lines), """\
3179a simple kind of mirror
3180to reflect upon our own
3181a simple kind of mirror
3182to reflect upon our own
3183""")
3184
3185 def test_typed_subpart_iterator_default_type(self):
3186 eq = self.assertEqual
3187 msg = self._msgobj('msg_03.txt')
3188 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
3189 lines = []
3190 subparts = 0
3191 for subpart in it:
3192 subparts += 1
3193 lines.append(subpart.get_payload())
3194 eq(subparts, 1)
3195 eq(EMPTYSTRING.join(lines), """\
3196
3197Hi,
3198
3199Do you like this message?
3200
3201-Me
3202""")
3203
R. David Murray45bf773f2010-07-17 01:19:57 +00003204 def test_pushCR_LF(self):
3205 '''FeedParser BufferedSubFile.push() assumed it received complete
3206 line endings. A CR ending one push() followed by a LF starting
3207 the next push() added an empty line.
3208 '''
3209 imt = [
3210 ("a\r \n", 2),
3211 ("b", 0),
3212 ("c\n", 1),
3213 ("", 0),
3214 ("d\r\n", 1),
3215 ("e\r", 0),
3216 ("\nf", 1),
3217 ("\r\n", 1),
3218 ]
3219 from email.feedparser import BufferedSubFile, NeedMoreData
3220 bsf = BufferedSubFile()
3221 om = []
3222 nt = 0
3223 for il, n in imt:
3224 bsf.push(il)
3225 nt += n
3226 n1 = 0
3227 while True:
3228 ol = bsf.readline()
3229 if ol == NeedMoreData:
3230 break
3231 om.append(ol)
3232 n1 += 1
3233 self.assertTrue(n == n1)
3234 self.assertTrue(len(om) == nt)
3235 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
3236
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003237
Ezio Melottib3aedd42010-11-20 19:04:17 +00003238
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003239class TestParsers(TestEmailBase):
R David Murrayb35c8502011-04-13 16:46:05 -04003240
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003241 def test_header_parser(self):
3242 eq = self.assertEqual
3243 # Parse only the headers of a complex multipart MIME document
3244 with openfile('msg_02.txt') as fp:
3245 msg = HeaderParser().parse(fp)
3246 eq(msg['from'], 'ppp-request@zzz.org')
3247 eq(msg['to'], 'ppp@zzz.org')
3248 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003249 self.assertFalse(msg.is_multipart())
3250 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003251
R David Murrayb35c8502011-04-13 16:46:05 -04003252 def test_bytes_header_parser(self):
3253 eq = self.assertEqual
3254 # Parse only the headers of a complex multipart MIME document
3255 with openfile('msg_02.txt', 'rb') as fp:
3256 msg = email.parser.BytesHeaderParser().parse(fp)
3257 eq(msg['from'], 'ppp-request@zzz.org')
3258 eq(msg['to'], 'ppp@zzz.org')
3259 eq(msg.get_content_type(), 'multipart/mixed')
3260 self.assertFalse(msg.is_multipart())
3261 self.assertTrue(isinstance(msg.get_payload(), str))
3262 self.assertTrue(isinstance(msg.get_payload(decode=True), bytes))
3263
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003264 def test_whitespace_continuation(self):
3265 eq = self.assertEqual
3266 # This message contains a line after the Subject: header that has only
3267 # whitespace, but it is not empty!
3268 msg = email.message_from_string("""\
3269From: aperson@dom.ain
3270To: bperson@dom.ain
3271Subject: the next line has a space on it
3272\x20
3273Date: Mon, 8 Apr 2002 15:09:19 -0400
3274Message-ID: spam
3275
3276Here's the message body
3277""")
3278 eq(msg['subject'], 'the next line has a space on it\n ')
3279 eq(msg['message-id'], 'spam')
3280 eq(msg.get_payload(), "Here's the message body\n")
3281
3282 def test_whitespace_continuation_last_header(self):
3283 eq = self.assertEqual
3284 # Like the previous test, but the subject line is the last
3285 # header.
3286 msg = email.message_from_string("""\
3287From: aperson@dom.ain
3288To: bperson@dom.ain
3289Date: Mon, 8 Apr 2002 15:09:19 -0400
3290Message-ID: spam
3291Subject: the next line has a space on it
3292\x20
3293
3294Here's the message body
3295""")
3296 eq(msg['subject'], 'the next line has a space on it\n ')
3297 eq(msg['message-id'], 'spam')
3298 eq(msg.get_payload(), "Here's the message body\n")
3299
3300 def test_crlf_separation(self):
3301 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003302 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003303 msg = Parser().parse(fp)
3304 eq(len(msg.get_payload()), 2)
3305 part1 = msg.get_payload(0)
3306 eq(part1.get_content_type(), 'text/plain')
3307 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3308 part2 = msg.get_payload(1)
3309 eq(part2.get_content_type(), 'application/riscos')
3310
R. David Murray8451c4b2010-10-23 22:19:56 +00003311 def test_crlf_flatten(self):
3312 # Using newline='\n' preserves the crlfs in this input file.
3313 with openfile('msg_26.txt', newline='\n') as fp:
3314 text = fp.read()
3315 msg = email.message_from_string(text)
3316 s = StringIO()
3317 g = Generator(s)
3318 g.flatten(msg, linesep='\r\n')
3319 self.assertEqual(s.getvalue(), text)
3320
3321 maxDiff = None
3322
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003323 def test_multipart_digest_with_extra_mime_headers(self):
3324 eq = self.assertEqual
3325 neq = self.ndiffAssertEqual
3326 with openfile('msg_28.txt') as fp:
3327 msg = email.message_from_file(fp)
3328 # Structure is:
3329 # multipart/digest
3330 # message/rfc822
3331 # text/plain
3332 # message/rfc822
3333 # text/plain
3334 eq(msg.is_multipart(), 1)
3335 eq(len(msg.get_payload()), 2)
3336 part1 = msg.get_payload(0)
3337 eq(part1.get_content_type(), 'message/rfc822')
3338 eq(part1.is_multipart(), 1)
3339 eq(len(part1.get_payload()), 1)
3340 part1a = part1.get_payload(0)
3341 eq(part1a.is_multipart(), 0)
3342 eq(part1a.get_content_type(), 'text/plain')
3343 neq(part1a.get_payload(), 'message 1\n')
3344 # next message/rfc822
3345 part2 = msg.get_payload(1)
3346 eq(part2.get_content_type(), 'message/rfc822')
3347 eq(part2.is_multipart(), 1)
3348 eq(len(part2.get_payload()), 1)
3349 part2a = part2.get_payload(0)
3350 eq(part2a.is_multipart(), 0)
3351 eq(part2a.get_content_type(), 'text/plain')
3352 neq(part2a.get_payload(), 'message 2\n')
3353
3354 def test_three_lines(self):
3355 # A bug report by Andrew McNamara
3356 lines = ['From: Andrew Person <aperson@dom.ain',
3357 'Subject: Test',
3358 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3359 msg = email.message_from_string(NL.join(lines))
3360 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3361
3362 def test_strip_line_feed_and_carriage_return_in_headers(self):
3363 eq = self.assertEqual
3364 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3365 value1 = 'text'
3366 value2 = 'more text'
3367 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3368 value1, value2)
3369 msg = email.message_from_string(m)
3370 eq(msg.get('Header'), value1)
3371 eq(msg.get('Next-Header'), value2)
3372
3373 def test_rfc2822_header_syntax(self):
3374 eq = self.assertEqual
3375 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3376 msg = email.message_from_string(m)
3377 eq(len(msg), 3)
3378 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3379 eq(msg.get_payload(), 'body')
3380
3381 def test_rfc2822_space_not_allowed_in_header(self):
3382 eq = self.assertEqual
3383 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3384 msg = email.message_from_string(m)
3385 eq(len(msg.keys()), 0)
3386
3387 def test_rfc2822_one_character_header(self):
3388 eq = self.assertEqual
3389 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3390 msg = email.message_from_string(m)
3391 headers = msg.keys()
3392 headers.sort()
3393 eq(headers, ['A', 'B', 'CC'])
3394 eq(msg.get_payload(), 'body')
3395
R. David Murray45e0e142010-06-16 02:19:40 +00003396 def test_CRLFLF_at_end_of_part(self):
3397 # issue 5610: feedparser should not eat two chars from body part ending
3398 # with "\r\n\n".
3399 m = (
3400 "From: foo@bar.com\n"
3401 "To: baz\n"
3402 "Mime-Version: 1.0\n"
3403 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3404 "\n"
3405 "--BOUNDARY\n"
3406 "Content-Type: text/plain\n"
3407 "\n"
3408 "body ending with CRLF newline\r\n"
3409 "\n"
3410 "--BOUNDARY--\n"
3411 )
3412 msg = email.message_from_string(m)
3413 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003414
Ezio Melottib3aedd42010-11-20 19:04:17 +00003415
R. David Murray96fd54e2010-10-08 15:55:28 +00003416class Test8BitBytesHandling(unittest.TestCase):
3417 # In Python3 all input is string, but that doesn't work if the actual input
3418 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3419 # decode byte streams using the surrogateescape error handler, and
3420 # reconvert to binary at appropriate places if we detect surrogates. This
3421 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3422 # but it does allow us to parse and preserve them, and to decode body
3423 # parts that use an 8bit CTE.
3424
3425 bodytest_msg = textwrap.dedent("""\
3426 From: foo@bar.com
3427 To: baz
3428 Mime-Version: 1.0
3429 Content-Type: text/plain; charset={charset}
3430 Content-Transfer-Encoding: {cte}
3431
3432 {bodyline}
3433 """)
3434
3435 def test_known_8bit_CTE(self):
3436 m = self.bodytest_msg.format(charset='utf-8',
3437 cte='8bit',
3438 bodyline='pöstal').encode('utf-8')
3439 msg = email.message_from_bytes(m)
3440 self.assertEqual(msg.get_payload(), "pöstal\n")
3441 self.assertEqual(msg.get_payload(decode=True),
3442 "pöstal\n".encode('utf-8'))
3443
3444 def test_unknown_8bit_CTE(self):
3445 m = self.bodytest_msg.format(charset='notavalidcharset',
3446 cte='8bit',
3447 bodyline='pöstal').encode('utf-8')
3448 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003449 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003450 self.assertEqual(msg.get_payload(decode=True),
3451 "pöstal\n".encode('utf-8'))
3452
3453 def test_8bit_in_quopri_body(self):
3454 # This is non-RFC compliant data...without 'decode' the library code
3455 # decodes the body using the charset from the headers, and because the
3456 # source byte really is utf-8 this works. This is likely to fail
3457 # against real dirty data (ie: produce mojibake), but the data is
3458 # invalid anyway so it is as good a guess as any. But this means that
3459 # this test just confirms the current behavior; that behavior is not
3460 # necessarily the best possible behavior. With 'decode' it is
3461 # returning the raw bytes, so that test should be of correct behavior,
3462 # or at least produce the same result that email4 did.
3463 m = self.bodytest_msg.format(charset='utf-8',
3464 cte='quoted-printable',
3465 bodyline='p=C3=B6stál').encode('utf-8')
3466 msg = email.message_from_bytes(m)
3467 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3468 self.assertEqual(msg.get_payload(decode=True),
3469 'pöstál\n'.encode('utf-8'))
3470
3471 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3472 # This is similar to the previous test, but proves that if the 8bit
3473 # byte is undecodeable in the specified charset, it gets replaced
3474 # by the unicode 'unknown' character. Again, this may or may not
3475 # be the ideal behavior. Note that if decode=False none of the
3476 # decoders will get involved, so this is the only test we need
3477 # for this behavior.
3478 m = self.bodytest_msg.format(charset='ascii',
3479 cte='quoted-printable',
3480 bodyline='p=C3=B6stál').encode('utf-8')
3481 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003482 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003483 self.assertEqual(msg.get_payload(decode=True),
3484 'pöstál\n'.encode('utf-8'))
3485
R David Murray80e0aee2012-05-27 21:23:34 -04003486 # test_defect_handling:test_invalid_chars_in_base64_payload
R. David Murray96fd54e2010-10-08 15:55:28 +00003487 def test_8bit_in_base64_body(self):
R David Murray80e0aee2012-05-27 21:23:34 -04003488 # If we get 8bit bytes in a base64 body, we can just ignore them
3489 # as being outside the base64 alphabet and decode anyway. But
3490 # we register a defect.
R. David Murray96fd54e2010-10-08 15:55:28 +00003491 m = self.bodytest_msg.format(charset='utf-8',
3492 cte='base64',
3493 bodyline='cMO2c3RhbAá=').encode('utf-8')
3494 msg = email.message_from_bytes(m)
3495 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -04003496 'pöstal'.encode('utf-8'))
3497 self.assertIsInstance(msg.defects[0],
3498 errors.InvalidBase64CharactersDefect)
R. David Murray96fd54e2010-10-08 15:55:28 +00003499
3500 def test_8bit_in_uuencode_body(self):
3501 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3502 # normal means, so the block is returned undecoded, but as bytes.
3503 m = self.bodytest_msg.format(charset='utf-8',
3504 cte='uuencode',
3505 bodyline='<,.V<W1A; á ').encode('utf-8')
3506 msg = email.message_from_bytes(m)
3507 self.assertEqual(msg.get_payload(decode=True),
3508 '<,.V<W1A; á \n'.encode('utf-8'))
3509
3510
R. David Murray92532142011-01-07 23:25:30 +00003511 headertest_headers = (
3512 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3513 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3514 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3515 '\tJean de Baddie',
3516 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3517 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3518 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3519 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3520 )
3521 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3522 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003523
3524 def test_get_8bit_header(self):
3525 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003526 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3527 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003528
3529 def test_print_8bit_headers(self):
3530 msg = email.message_from_bytes(self.headertest_msg)
3531 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003532 textwrap.dedent("""\
3533 From: {}
3534 To: {}
3535 Subject: {}
3536 From: {}
3537
3538 Yes, they are flying.
3539 """).format(*[expected[1] for (_, expected) in
3540 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003541
3542 def test_values_with_8bit_headers(self):
3543 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003544 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003545 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003546 'b\uFFFD\uFFFDz',
3547 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3548 'coll\uFFFD\uFFFDgue, le pouf '
3549 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003550 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003551 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003552
3553 def test_items_with_8bit_headers(self):
3554 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003555 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003556 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003557 ('To', 'b\uFFFD\uFFFDz'),
3558 ('Subject', 'Maintenant je vous '
3559 'pr\uFFFD\uFFFDsente '
3560 'mon coll\uFFFD\uFFFDgue, le pouf '
3561 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3562 '\tJean de Baddie'),
3563 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003564
3565 def test_get_all_with_8bit_headers(self):
3566 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003567 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003568 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003569 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003570
R David Murraya2150232011-03-16 21:11:23 -04003571 def test_get_content_type_with_8bit(self):
3572 msg = email.message_from_bytes(textwrap.dedent("""\
3573 Content-Type: text/pl\xA7in; charset=utf-8
3574 """).encode('latin-1'))
3575 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3576 self.assertEqual(msg.get_content_maintype(), "text")
3577 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3578
R David Murray97f43c02012-06-24 05:03:27 -04003579 # test_headerregistry.TestContentTypeHeader.non_ascii_in_params
R David Murraya2150232011-03-16 21:11:23 -04003580 def test_get_params_with_8bit(self):
3581 msg = email.message_from_bytes(
3582 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3583 self.assertEqual(msg.get_params(header='x-header'),
3584 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3585 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3586 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3587 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3588
R David Murray97f43c02012-06-24 05:03:27 -04003589 # test_headerregistry.TestContentTypeHeader.non_ascii_in_rfc2231_value
R David Murraya2150232011-03-16 21:11:23 -04003590 def test_get_rfc2231_params_with_8bit(self):
3591 msg = email.message_from_bytes(textwrap.dedent("""\
3592 Content-Type: text/plain; charset=us-ascii;
3593 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3594 ).encode('latin-1'))
3595 self.assertEqual(msg.get_param('title'),
3596 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3597
3598 def test_set_rfc2231_params_with_8bit(self):
3599 msg = email.message_from_bytes(textwrap.dedent("""\
3600 Content-Type: text/plain; charset=us-ascii;
3601 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3602 ).encode('latin-1'))
3603 msg.set_param('title', 'test')
3604 self.assertEqual(msg.get_param('title'), 'test')
3605
3606 def test_del_rfc2231_params_with_8bit(self):
3607 msg = email.message_from_bytes(textwrap.dedent("""\
3608 Content-Type: text/plain; charset=us-ascii;
3609 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3610 ).encode('latin-1'))
3611 msg.del_param('title')
3612 self.assertEqual(msg.get_param('title'), None)
3613 self.assertEqual(msg.get_content_maintype(), 'text')
3614
3615 def test_get_payload_with_8bit_cte_header(self):
3616 msg = email.message_from_bytes(textwrap.dedent("""\
3617 Content-Transfer-Encoding: b\xa7se64
3618 Content-Type: text/plain; charset=latin-1
3619
3620 payload
3621 """).encode('latin-1'))
3622 self.assertEqual(msg.get_payload(), 'payload\n')
3623 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3624
R. David Murray96fd54e2010-10-08 15:55:28 +00003625 non_latin_bin_msg = textwrap.dedent("""\
3626 From: foo@bar.com
3627 To: báz
3628 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3629 \tJean de Baddie
3630 Mime-Version: 1.0
3631 Content-Type: text/plain; charset="utf-8"
3632 Content-Transfer-Encoding: 8bit
3633
3634 Да, они летят.
3635 """).encode('utf-8')
3636
3637 def test_bytes_generator(self):
3638 msg = email.message_from_bytes(self.non_latin_bin_msg)
3639 out = BytesIO()
3640 email.generator.BytesGenerator(out).flatten(msg)
3641 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3642
R. David Murray7372a072011-01-26 21:21:32 +00003643 def test_bytes_generator_handles_None_body(self):
3644 #Issue 11019
3645 msg = email.message.Message()
3646 out = BytesIO()
3647 email.generator.BytesGenerator(out).flatten(msg)
3648 self.assertEqual(out.getvalue(), b"\n")
3649
R. David Murray92532142011-01-07 23:25:30 +00003650 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003651 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003652 To: =?unknown-8bit?q?b=C3=A1z?=
3653 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3654 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3655 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003656 Mime-Version: 1.0
3657 Content-Type: text/plain; charset="utf-8"
3658 Content-Transfer-Encoding: base64
3659
3660 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3661 """)
3662
3663 def test_generator_handles_8bit(self):
3664 msg = email.message_from_bytes(self.non_latin_bin_msg)
3665 out = StringIO()
3666 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003667 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003668
3669 def test_bytes_generator_with_unix_from(self):
3670 # The unixfrom contains a current date, so we can't check it
3671 # literally. Just make sure the first word is 'From' and the
3672 # rest of the message matches the input.
3673 msg = email.message_from_bytes(self.non_latin_bin_msg)
3674 out = BytesIO()
3675 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3676 lines = out.getvalue().split(b'\n')
3677 self.assertEqual(lines[0].split()[0], b'From')
3678 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3679
R. David Murray92532142011-01-07 23:25:30 +00003680 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3681 non_latin_bin_msg_as7bit[2:4] = [
3682 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3683 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3684 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3685
R. David Murray96fd54e2010-10-08 15:55:28 +00003686 def test_message_from_binary_file(self):
3687 fn = 'test.msg'
3688 self.addCleanup(unlink, fn)
3689 with open(fn, 'wb') as testfile:
3690 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003691 with open(fn, 'rb') as testfile:
3692 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003693 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3694
3695 latin_bin_msg = textwrap.dedent("""\
3696 From: foo@bar.com
3697 To: Dinsdale
3698 Subject: Nudge nudge, wink, wink
3699 Mime-Version: 1.0
3700 Content-Type: text/plain; charset="latin-1"
3701 Content-Transfer-Encoding: 8bit
3702
3703 oh là là, know what I mean, know what I mean?
3704 """).encode('latin-1')
3705
3706 latin_bin_msg_as7bit = textwrap.dedent("""\
3707 From: foo@bar.com
3708 To: Dinsdale
3709 Subject: Nudge nudge, wink, wink
3710 Mime-Version: 1.0
3711 Content-Type: text/plain; charset="iso-8859-1"
3712 Content-Transfer-Encoding: quoted-printable
3713
3714 oh l=E0 l=E0, know what I mean, know what I mean?
3715 """)
3716
3717 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3718 m = email.message_from_bytes(self.latin_bin_msg)
3719 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3720
3721 def test_decoded_generator_emits_unicode_body(self):
3722 m = email.message_from_bytes(self.latin_bin_msg)
3723 out = StringIO()
3724 email.generator.DecodedGenerator(out).flatten(m)
3725 #DecodedHeader output contains an extra blank line compared
3726 #to the input message. RDM: not sure if this is a bug or not,
3727 #but it is not specific to the 8bit->7bit conversion.
3728 self.assertEqual(out.getvalue(),
3729 self.latin_bin_msg.decode('latin-1')+'\n')
3730
3731 def test_bytes_feedparser(self):
3732 bfp = email.feedparser.BytesFeedParser()
3733 for i in range(0, len(self.latin_bin_msg), 10):
3734 bfp.feed(self.latin_bin_msg[i:i+10])
3735 m = bfp.close()
3736 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3737
R. David Murray8451c4b2010-10-23 22:19:56 +00003738 def test_crlf_flatten(self):
3739 with openfile('msg_26.txt', 'rb') as fp:
3740 text = fp.read()
3741 msg = email.message_from_bytes(text)
3742 s = BytesIO()
3743 g = email.generator.BytesGenerator(s)
3744 g.flatten(msg, linesep='\r\n')
3745 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003746
3747 def test_8bit_multipart(self):
3748 # Issue 11605
3749 source = textwrap.dedent("""\
3750 Date: Fri, 18 Mar 2011 17:15:43 +0100
3751 To: foo@example.com
3752 From: foodwatch-Newsletter <bar@example.com>
3753 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3754 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3755 MIME-Version: 1.0
3756 Content-Type: multipart/alternative;
3757 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3758
3759 --b1_76a486bee62b0d200f33dc2ca08220ad
3760 Content-Type: text/plain; charset="utf-8"
3761 Content-Transfer-Encoding: 8bit
3762
3763 Guten Tag, ,
3764
3765 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3766 Nachrichten aus Japan.
3767
3768
3769 --b1_76a486bee62b0d200f33dc2ca08220ad
3770 Content-Type: text/html; charset="utf-8"
3771 Content-Transfer-Encoding: 8bit
3772
3773 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3774 "http://www.w3.org/TR/html4/loose.dtd">
3775 <html lang="de">
3776 <head>
3777 <title>foodwatch - Newsletter</title>
3778 </head>
3779 <body>
3780 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3781 die Nachrichten aus Japan.</p>
3782 </body>
3783 </html>
3784 --b1_76a486bee62b0d200f33dc2ca08220ad--
3785
3786 """).encode('utf-8')
3787 msg = email.message_from_bytes(source)
3788 s = BytesIO()
3789 g = email.generator.BytesGenerator(s)
3790 g.flatten(msg)
3791 self.assertEqual(s.getvalue(), source)
3792
R David Murray9fd170e2012-03-14 14:05:03 -04003793 def test_bytes_generator_b_encoding_linesep(self):
3794 # Issue 14062: b encoding was tacking on an extra \n.
3795 m = Message()
3796 # This has enough non-ascii that it should always end up b encoded.
3797 m['Subject'] = Header('žluťoučký kůň')
3798 s = BytesIO()
3799 g = email.generator.BytesGenerator(s)
3800 g.flatten(m, linesep='\r\n')
3801 self.assertEqual(
3802 s.getvalue(),
3803 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3804
3805 def test_generator_b_encoding_linesep(self):
3806 # Since this broke in ByteGenerator, test Generator for completeness.
3807 m = Message()
3808 # This has enough non-ascii that it should always end up b encoded.
3809 m['Subject'] = Header('žluťoučký kůň')
3810 s = StringIO()
3811 g = email.generator.Generator(s)
3812 g.flatten(m, linesep='\r\n')
3813 self.assertEqual(
3814 s.getvalue(),
3815 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3816
R. David Murray8451c4b2010-10-23 22:19:56 +00003817 maxDiff = None
3818
Ezio Melottib3aedd42010-11-20 19:04:17 +00003819
R. David Murray719a4492010-11-21 16:53:48 +00003820class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003821
R. David Murraye5db2632010-11-20 15:10:13 +00003822 maxDiff = None
3823
R. David Murray96fd54e2010-10-08 15:55:28 +00003824 def _msgobj(self, filename):
3825 with openfile(filename, 'rb') as fp:
3826 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003827 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003828 msg = email.message_from_bytes(data)
3829 return msg, data
3830
R. David Murray719a4492010-11-21 16:53:48 +00003831 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003832 b = BytesIO()
3833 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003834 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R David Murraya46ed112011-03-31 13:11:40 -04003835 self.assertEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003836
3837
R. David Murray719a4492010-11-21 16:53:48 +00003838class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3839 TestIdempotent):
3840 linesep = '\n'
3841 blinesep = b'\n'
3842 normalize_linesep_regex = re.compile(br'\r\n')
3843
3844
3845class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3846 TestIdempotent):
3847 linesep = '\r\n'
3848 blinesep = b'\r\n'
3849 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3850
Ezio Melottib3aedd42010-11-20 19:04:17 +00003851
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003852class TestBase64(unittest.TestCase):
3853 def test_len(self):
3854 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003855 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003856 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003857 for size in range(15):
3858 if size == 0 : bsize = 0
3859 elif size <= 3 : bsize = 4
3860 elif size <= 6 : bsize = 8
3861 elif size <= 9 : bsize = 12
3862 elif size <= 12: bsize = 16
3863 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003864 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003865
3866 def test_decode(self):
3867 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003868 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003869 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003870
3871 def test_encode(self):
3872 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003873 eq(base64mime.body_encode(b''), b'')
3874 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003875 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003876 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003877 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003878 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003879eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3880eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3881eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3882eHh4eCB4eHh4IA==
3883""")
3884 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003885 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003886 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003887eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3888eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3889eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3890eHh4eCB4eHh4IA==\r
3891""")
3892
3893 def test_header_encode(self):
3894 eq = self.assertEqual
3895 he = base64mime.header_encode
3896 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003897 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3898 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003899 # Test the charset option
3900 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3901 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003902
3903
Ezio Melottib3aedd42010-11-20 19:04:17 +00003904
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003905class TestQuopri(unittest.TestCase):
3906 def setUp(self):
3907 # Set of characters (as byte integers) that don't need to be encoded
3908 # in headers.
3909 self.hlit = list(chain(
3910 range(ord('a'), ord('z') + 1),
3911 range(ord('A'), ord('Z') + 1),
3912 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003913 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003914 # Set of characters (as byte integers) that do need to be encoded in
3915 # headers.
3916 self.hnon = [c for c in range(256) if c not in self.hlit]
3917 assert len(self.hlit) + len(self.hnon) == 256
3918 # Set of characters (as byte integers) that don't need to be encoded
3919 # in bodies.
3920 self.blit = list(range(ord(' '), ord('~') + 1))
3921 self.blit.append(ord('\t'))
3922 self.blit.remove(ord('='))
3923 # Set of characters (as byte integers) that do need to be encoded in
3924 # bodies.
3925 self.bnon = [c for c in range(256) if c not in self.blit]
3926 assert len(self.blit) + len(self.bnon) == 256
3927
Guido van Rossum9604e662007-08-30 03:46:43 +00003928 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003929 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003930 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003931 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003932 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003933 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003934 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003935
Guido van Rossum9604e662007-08-30 03:46:43 +00003936 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003937 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003938 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003939 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003940 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003941 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003942 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003943
3944 def test_header_quopri_len(self):
3945 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003946 eq(quoprimime.header_length(b'hello'), 5)
3947 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003948 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003949 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003950 # =?xxx?q?...?= means 10 extra characters
3951 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003952 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3953 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003954 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003955 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003956 # =?xxx?q?...?= means 10 extra characters
3957 10)
3958 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003959 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003960 'expected length 1 for %r' % chr(c))
3961 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003962 # Space is special; it's encoded to _
3963 if c == ord(' '):
3964 continue
3965 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003966 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003967 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003968
3969 def test_body_quopri_len(self):
3970 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003971 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003972 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003973 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003974 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003975
3976 def test_quote_unquote_idempotent(self):
3977 for x in range(256):
3978 c = chr(x)
3979 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3980
R David Murrayec1b5b82011-03-23 14:19:05 -04003981 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3982 if charset is None:
3983 encoded_header = quoprimime.header_encode(header)
3984 else:
3985 encoded_header = quoprimime.header_encode(header, charset)
3986 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003987
R David Murraycafd79d2011-03-23 15:25:55 -04003988 def test_header_encode_null(self):
3989 self._test_header_encode(b'', '')
3990
R David Murrayec1b5b82011-03-23 14:19:05 -04003991 def test_header_encode_one_word(self):
3992 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3993
3994 def test_header_encode_two_lines(self):
3995 self._test_header_encode(b'hello\nworld',
3996 '=?iso-8859-1?q?hello=0Aworld?=')
3997
3998 def test_header_encode_non_ascii(self):
3999 self._test_header_encode(b'hello\xc7there',
4000 '=?iso-8859-1?q?hello=C7there?=')
4001
4002 def test_header_encode_alt_charset(self):
4003 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
4004 charset='iso-8859-2')
4005
4006 def _test_header_decode(self, encoded_header, expected_decoded_header):
4007 decoded_header = quoprimime.header_decode(encoded_header)
4008 self.assertEqual(decoded_header, expected_decoded_header)
4009
4010 def test_header_decode_null(self):
4011 self._test_header_decode('', '')
4012
4013 def test_header_decode_one_word(self):
4014 self._test_header_decode('hello', 'hello')
4015
4016 def test_header_decode_two_lines(self):
4017 self._test_header_decode('hello=0Aworld', 'hello\nworld')
4018
4019 def test_header_decode_non_ascii(self):
4020 self._test_header_decode('hello=C7there', 'hello\xc7there')
4021
4022 def _test_decode(self, encoded, expected_decoded, eol=None):
4023 if eol is None:
4024 decoded = quoprimime.decode(encoded)
4025 else:
4026 decoded = quoprimime.decode(encoded, eol=eol)
4027 self.assertEqual(decoded, expected_decoded)
4028
4029 def test_decode_null_word(self):
4030 self._test_decode('', '')
4031
4032 def test_decode_null_line_null_word(self):
4033 self._test_decode('\r\n', '\n')
4034
4035 def test_decode_one_word(self):
4036 self._test_decode('hello', 'hello')
4037
4038 def test_decode_one_word_eol(self):
4039 self._test_decode('hello', 'hello', eol='X')
4040
4041 def test_decode_one_line(self):
4042 self._test_decode('hello\r\n', 'hello\n')
4043
4044 def test_decode_one_line_lf(self):
4045 self._test_decode('hello\n', 'hello\n')
4046
R David Murraycafd79d2011-03-23 15:25:55 -04004047 def test_decode_one_line_cr(self):
4048 self._test_decode('hello\r', 'hello\n')
4049
4050 def test_decode_one_line_nl(self):
4051 self._test_decode('hello\n', 'helloX', eol='X')
4052
4053 def test_decode_one_line_crnl(self):
4054 self._test_decode('hello\r\n', 'helloX', eol='X')
4055
R David Murrayec1b5b82011-03-23 14:19:05 -04004056 def test_decode_one_line_one_word(self):
4057 self._test_decode('hello\r\nworld', 'hello\nworld')
4058
4059 def test_decode_one_line_one_word_eol(self):
4060 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
4061
4062 def test_decode_two_lines(self):
4063 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
4064
R David Murraycafd79d2011-03-23 15:25:55 -04004065 def test_decode_two_lines_eol(self):
4066 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
4067
R David Murrayec1b5b82011-03-23 14:19:05 -04004068 def test_decode_one_long_line(self):
4069 self._test_decode('Spam' * 250, 'Spam' * 250)
4070
4071 def test_decode_one_space(self):
4072 self._test_decode(' ', '')
4073
4074 def test_decode_multiple_spaces(self):
4075 self._test_decode(' ' * 5, '')
4076
4077 def test_decode_one_line_trailing_spaces(self):
4078 self._test_decode('hello \r\n', 'hello\n')
4079
4080 def test_decode_two_lines_trailing_spaces(self):
4081 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
4082
4083 def test_decode_quoted_word(self):
4084 self._test_decode('=22quoted=20words=22', '"quoted words"')
4085
4086 def test_decode_uppercase_quoting(self):
4087 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
4088
4089 def test_decode_lowercase_quoting(self):
4090 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
4091
4092 def test_decode_soft_line_break(self):
4093 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
4094
4095 def test_decode_false_quoting(self):
4096 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
4097
4098 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
4099 kwargs = {}
4100 if maxlinelen is None:
4101 # Use body_encode's default.
4102 maxlinelen = 76
4103 else:
4104 kwargs['maxlinelen'] = maxlinelen
4105 if eol is None:
4106 # Use body_encode's default.
4107 eol = '\n'
4108 else:
4109 kwargs['eol'] = eol
4110 encoded_body = quoprimime.body_encode(body, **kwargs)
4111 self.assertEqual(encoded_body, expected_encoded_body)
4112 if eol == '\n' or eol == '\r\n':
4113 # We know how to split the result back into lines, so maxlinelen
4114 # can be checked.
4115 for line in encoded_body.splitlines():
4116 self.assertLessEqual(len(line), maxlinelen)
4117
4118 def test_encode_null(self):
4119 self._test_encode('', '')
4120
4121 def test_encode_null_lines(self):
4122 self._test_encode('\n\n', '\n\n')
4123
4124 def test_encode_one_line(self):
4125 self._test_encode('hello\n', 'hello\n')
4126
4127 def test_encode_one_line_crlf(self):
4128 self._test_encode('hello\r\n', 'hello\n')
4129
4130 def test_encode_one_line_eol(self):
4131 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
4132
4133 def test_encode_one_space(self):
4134 self._test_encode(' ', '=20')
4135
4136 def test_encode_one_line_one_space(self):
4137 self._test_encode(' \n', '=20\n')
4138
R David Murrayb938c8c2011-03-24 12:19:26 -04004139# XXX: body_encode() expect strings, but uses ord(char) from these strings
4140# to index into a 256-entry list. For code points above 255, this will fail.
4141# Should there be a check for 8-bit only ord() values in body, or at least
4142# a comment about the expected input?
4143
4144 def test_encode_two_lines_one_space(self):
4145 self._test_encode(' \n \n', '=20\n=20\n')
4146
R David Murrayec1b5b82011-03-23 14:19:05 -04004147 def test_encode_one_word_trailing_spaces(self):
4148 self._test_encode('hello ', 'hello =20')
4149
4150 def test_encode_one_line_trailing_spaces(self):
4151 self._test_encode('hello \n', 'hello =20\n')
4152
4153 def test_encode_one_word_trailing_tab(self):
4154 self._test_encode('hello \t', 'hello =09')
4155
4156 def test_encode_one_line_trailing_tab(self):
4157 self._test_encode('hello \t\n', 'hello =09\n')
4158
4159 def test_encode_trailing_space_before_maxlinelen(self):
4160 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
4161
R David Murrayb938c8c2011-03-24 12:19:26 -04004162 def test_encode_trailing_space_at_maxlinelen(self):
4163 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
4164
R David Murrayec1b5b82011-03-23 14:19:05 -04004165 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04004166 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
4167
4168 def test_encode_whitespace_lines(self):
4169 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04004170
4171 def test_encode_quoted_equals(self):
4172 self._test_encode('a = b', 'a =3D b')
4173
4174 def test_encode_one_long_string(self):
4175 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
4176
4177 def test_encode_one_long_line(self):
4178 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4179
4180 def test_encode_one_very_long_line(self):
4181 self._test_encode('x' * 200 + '\n',
4182 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
4183
4184 def test_encode_one_long_line(self):
4185 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4186
4187 def test_encode_shortest_maxlinelen(self):
4188 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004189
R David Murrayb938c8c2011-03-24 12:19:26 -04004190 def test_encode_maxlinelen_too_small(self):
4191 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
4192
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004193 def test_encode(self):
4194 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00004195 eq(quoprimime.body_encode(''), '')
4196 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004197 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00004198 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004199 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00004200 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004201xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
4202 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
4203x xxxx xxxx xxxx xxxx=20""")
4204 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00004205 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4206 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004207xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
4208 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
4209x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00004210 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004211one line
4212
4213two line"""), """\
4214one line
4215
4216two line""")
4217
4218
Ezio Melottib3aedd42010-11-20 19:04:17 +00004219
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004220# Test the Charset class
4221class TestCharset(unittest.TestCase):
4222 def tearDown(self):
4223 from email import charset as CharsetModule
4224 try:
4225 del CharsetModule.CHARSETS['fake']
4226 except KeyError:
4227 pass
4228
Guido van Rossum9604e662007-08-30 03:46:43 +00004229 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004230 eq = self.assertEqual
4231 # Make sure us-ascii = no Unicode conversion
4232 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00004233 eq(c.header_encode('Hello World!'), 'Hello World!')
4234 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004235 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00004236 self.assertRaises(UnicodeError, c.header_encode, s)
4237 c = Charset('utf-8')
4238 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004239
4240 def test_body_encode(self):
4241 eq = self.assertEqual
4242 # Try a charset with QP body encoding
4243 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004244 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004245 # Try a charset with Base64 body encoding
4246 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00004247 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004248 # Try a charset with None body encoding
4249 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004250 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004251 # Try the convert argument, where input codec != output codec
4252 c = Charset('euc-jp')
4253 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00004254 # XXX FIXME
4255## try:
4256## eq('\x1b$B5FCO;~IW\x1b(B',
4257## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4258## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4259## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4260## except LookupError:
4261## # We probably don't have the Japanese codecs installed
4262## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004263 # Testing SF bug #625509, which we have to fake, since there are no
4264 # built-in encodings where the header encoding is QP but the body
4265 # encoding is not.
4266 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04004267 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004268 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04004269 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004270
4271 def test_unicode_charset_name(self):
4272 charset = Charset('us-ascii')
4273 self.assertEqual(str(charset), 'us-ascii')
4274 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4275
4276
Ezio Melottib3aedd42010-11-20 19:04:17 +00004277
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004278# Test multilingual MIME headers.
4279class TestHeader(TestEmailBase):
4280 def test_simple(self):
4281 eq = self.ndiffAssertEqual
4282 h = Header('Hello World!')
4283 eq(h.encode(), 'Hello World!')
4284 h.append(' Goodbye World!')
4285 eq(h.encode(), 'Hello World! Goodbye World!')
4286
4287 def test_simple_surprise(self):
4288 eq = self.ndiffAssertEqual
4289 h = Header('Hello World!')
4290 eq(h.encode(), 'Hello World!')
4291 h.append('Goodbye World!')
4292 eq(h.encode(), 'Hello World! Goodbye World!')
4293
4294 def test_header_needs_no_decoding(self):
4295 h = 'no decoding needed'
4296 self.assertEqual(decode_header(h), [(h, None)])
4297
4298 def test_long(self):
4299 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4300 maxlinelen=76)
4301 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004302 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004303
4304 def test_multilingual(self):
4305 eq = self.ndiffAssertEqual
4306 g = Charset("iso-8859-1")
4307 cz = Charset("iso-8859-2")
4308 utf8 = Charset("utf-8")
4309 g_head = (b'Die Mieter treten hier ein werden mit einem '
4310 b'Foerderband komfortabel den Korridor entlang, '
4311 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4312 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4313 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4314 b'd\xf9vtipu.. ')
4315 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4316 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4317 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4318 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4319 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4320 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4321 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4322 '\u3044\u307e\u3059\u3002')
4323 h = Header(g_head, g)
4324 h.append(cz_head, cz)
4325 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004326 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004327 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004328=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4329 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4330 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4331 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004332 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4333 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4334 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4335 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004336 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4337 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4338 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4339 decoded = decode_header(enc)
4340 eq(len(decoded), 3)
4341 eq(decoded[0], (g_head, 'iso-8859-1'))
4342 eq(decoded[1], (cz_head, 'iso-8859-2'))
4343 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004344 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004345 eq(ustr,
4346 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4347 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4348 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4349 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4350 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4351 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4352 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4353 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4354 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4355 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4356 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4357 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4358 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4359 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4360 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4361 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4362 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004363 # Test make_header()
4364 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004365 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004366
4367 def test_empty_header_encode(self):
4368 h = Header()
4369 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004370
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004371 def test_header_ctor_default_args(self):
4372 eq = self.ndiffAssertEqual
4373 h = Header()
4374 eq(h, '')
4375 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004376 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004377
4378 def test_explicit_maxlinelen(self):
4379 eq = self.ndiffAssertEqual
4380 hstr = ('A very long line that must get split to something other '
4381 'than at the 76th character boundary to test the non-default '
4382 'behavior')
4383 h = Header(hstr)
4384 eq(h.encode(), '''\
4385A very long line that must get split to something other than at the 76th
4386 character boundary to test the non-default behavior''')
4387 eq(str(h), hstr)
4388 h = Header(hstr, header_name='Subject')
4389 eq(h.encode(), '''\
4390A very long line that must get split to something other than at the
4391 76th character boundary to test the non-default behavior''')
4392 eq(str(h), hstr)
4393 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4394 eq(h.encode(), hstr)
4395 eq(str(h), hstr)
4396
Guido van Rossum9604e662007-08-30 03:46:43 +00004397 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004398 eq = self.ndiffAssertEqual
4399 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004400 x = 'xxxx ' * 20
4401 h.append(x)
4402 s = h.encode()
4403 eq(s, """\
4404=?iso-8859-1?q?xxx?=
4405 =?iso-8859-1?q?x_?=
4406 =?iso-8859-1?q?xx?=
4407 =?iso-8859-1?q?xx?=
4408 =?iso-8859-1?q?_x?=
4409 =?iso-8859-1?q?xx?=
4410 =?iso-8859-1?q?x_?=
4411 =?iso-8859-1?q?xx?=
4412 =?iso-8859-1?q?xx?=
4413 =?iso-8859-1?q?_x?=
4414 =?iso-8859-1?q?xx?=
4415 =?iso-8859-1?q?x_?=
4416 =?iso-8859-1?q?xx?=
4417 =?iso-8859-1?q?xx?=
4418 =?iso-8859-1?q?_x?=
4419 =?iso-8859-1?q?xx?=
4420 =?iso-8859-1?q?x_?=
4421 =?iso-8859-1?q?xx?=
4422 =?iso-8859-1?q?xx?=
4423 =?iso-8859-1?q?_x?=
4424 =?iso-8859-1?q?xx?=
4425 =?iso-8859-1?q?x_?=
4426 =?iso-8859-1?q?xx?=
4427 =?iso-8859-1?q?xx?=
4428 =?iso-8859-1?q?_x?=
4429 =?iso-8859-1?q?xx?=
4430 =?iso-8859-1?q?x_?=
4431 =?iso-8859-1?q?xx?=
4432 =?iso-8859-1?q?xx?=
4433 =?iso-8859-1?q?_x?=
4434 =?iso-8859-1?q?xx?=
4435 =?iso-8859-1?q?x_?=
4436 =?iso-8859-1?q?xx?=
4437 =?iso-8859-1?q?xx?=
4438 =?iso-8859-1?q?_x?=
4439 =?iso-8859-1?q?xx?=
4440 =?iso-8859-1?q?x_?=
4441 =?iso-8859-1?q?xx?=
4442 =?iso-8859-1?q?xx?=
4443 =?iso-8859-1?q?_x?=
4444 =?iso-8859-1?q?xx?=
4445 =?iso-8859-1?q?x_?=
4446 =?iso-8859-1?q?xx?=
4447 =?iso-8859-1?q?xx?=
4448 =?iso-8859-1?q?_x?=
4449 =?iso-8859-1?q?xx?=
4450 =?iso-8859-1?q?x_?=
4451 =?iso-8859-1?q?xx?=
4452 =?iso-8859-1?q?xx?=
4453 =?iso-8859-1?q?_?=""")
4454 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004455 h = Header(charset='iso-8859-1', maxlinelen=40)
4456 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004457 s = h.encode()
4458 eq(s, """\
4459=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4460 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4461 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4462 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4463 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4464 eq(x, str(make_header(decode_header(s))))
4465
4466 def test_base64_splittable(self):
4467 eq = self.ndiffAssertEqual
4468 h = Header(charset='koi8-r', maxlinelen=20)
4469 x = 'xxxx ' * 20
4470 h.append(x)
4471 s = h.encode()
4472 eq(s, """\
4473=?koi8-r?b?eHh4?=
4474 =?koi8-r?b?eCB4?=
4475 =?koi8-r?b?eHh4?=
4476 =?koi8-r?b?IHh4?=
4477 =?koi8-r?b?eHgg?=
4478 =?koi8-r?b?eHh4?=
4479 =?koi8-r?b?eCB4?=
4480 =?koi8-r?b?eHh4?=
4481 =?koi8-r?b?IHh4?=
4482 =?koi8-r?b?eHgg?=
4483 =?koi8-r?b?eHh4?=
4484 =?koi8-r?b?eCB4?=
4485 =?koi8-r?b?eHh4?=
4486 =?koi8-r?b?IHh4?=
4487 =?koi8-r?b?eHgg?=
4488 =?koi8-r?b?eHh4?=
4489 =?koi8-r?b?eCB4?=
4490 =?koi8-r?b?eHh4?=
4491 =?koi8-r?b?IHh4?=
4492 =?koi8-r?b?eHgg?=
4493 =?koi8-r?b?eHh4?=
4494 =?koi8-r?b?eCB4?=
4495 =?koi8-r?b?eHh4?=
4496 =?koi8-r?b?IHh4?=
4497 =?koi8-r?b?eHgg?=
4498 =?koi8-r?b?eHh4?=
4499 =?koi8-r?b?eCB4?=
4500 =?koi8-r?b?eHh4?=
4501 =?koi8-r?b?IHh4?=
4502 =?koi8-r?b?eHgg?=
4503 =?koi8-r?b?eHh4?=
4504 =?koi8-r?b?eCB4?=
4505 =?koi8-r?b?eHh4?=
4506 =?koi8-r?b?IA==?=""")
4507 eq(x, str(make_header(decode_header(s))))
4508 h = Header(charset='koi8-r', maxlinelen=40)
4509 h.append(x)
4510 s = h.encode()
4511 eq(s, """\
4512=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4513 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4514 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4515 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4516 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4517 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4518 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004519
4520 def test_us_ascii_header(self):
4521 eq = self.assertEqual
4522 s = 'hello'
4523 x = decode_header(s)
4524 eq(x, [('hello', None)])
4525 h = make_header(x)
4526 eq(s, h.encode())
4527
4528 def test_string_charset(self):
4529 eq = self.assertEqual
4530 h = Header()
4531 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004532 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004533
4534## def test_unicode_error(self):
4535## raises = self.assertRaises
4536## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4537## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4538## h = Header()
4539## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4540## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4541## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4542
4543 def test_utf8_shortest(self):
4544 eq = self.assertEqual
4545 h = Header('p\xf6stal', 'utf-8')
4546 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4547 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4548 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4549
4550 def test_bad_8bit_header(self):
4551 raises = self.assertRaises
4552 eq = self.assertEqual
4553 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4554 raises(UnicodeError, Header, x)
4555 h = Header()
4556 raises(UnicodeError, h.append, x)
4557 e = x.decode('utf-8', 'replace')
4558 eq(str(Header(x, errors='replace')), e)
4559 h.append(x, errors='replace')
4560 eq(str(h), e)
4561
R David Murray041015c2011-03-25 15:10:55 -04004562 def test_escaped_8bit_header(self):
4563 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004564 e = x.decode('ascii', 'surrogateescape')
4565 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004566 self.assertEqual(str(h),
4567 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4568 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4569
R David Murraye5e366c2011-06-18 12:57:28 -04004570 def test_header_handles_binary_unknown8bit(self):
4571 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4572 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4573 self.assertEqual(str(h),
4574 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4575 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4576
4577 def test_make_header_handles_binary_unknown8bit(self):
4578 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4579 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4580 h2 = email.header.make_header(email.header.decode_header(h))
4581 self.assertEqual(str(h2),
4582 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4583 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4584
R David Murray041015c2011-03-25 15:10:55 -04004585 def test_modify_returned_list_does_not_change_header(self):
4586 h = Header('test')
4587 chunks = email.header.decode_header(h)
4588 chunks.append(('ascii', 'test2'))
4589 self.assertEqual(str(h), 'test')
4590
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004591 def test_encoded_adjacent_nonencoded(self):
4592 eq = self.assertEqual
4593 h = Header()
4594 h.append('hello', 'iso-8859-1')
4595 h.append('world')
4596 s = h.encode()
4597 eq(s, '=?iso-8859-1?q?hello?= world')
4598 h = make_header(decode_header(s))
4599 eq(h.encode(), s)
4600
R David Murray07ea53c2012-06-02 17:56:49 -04004601 def test_whitespace_keeper(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004602 eq = self.assertEqual
4603 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4604 parts = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04004605 eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004606 hdr = make_header(parts)
4607 eq(hdr.encode(),
4608 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4609
4610 def test_broken_base64_header(self):
4611 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004612 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004613 raises(errors.HeaderParseError, decode_header, s)
4614
R. David Murray477efb32011-01-05 01:39:32 +00004615 def test_shift_jis_charset(self):
4616 h = Header('文', charset='shift_jis')
4617 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4618
R David Murrayde912762011-03-16 18:26:23 -04004619 def test_flatten_header_with_no_value(self):
4620 # Issue 11401 (regression from email 4.x) Note that the space after
4621 # the header doesn't reflect the input, but this is also the way
4622 # email 4.x behaved. At some point it would be nice to fix that.
4623 msg = email.message_from_string("EmptyHeader:")
4624 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4625
R David Murray01581ee2011-04-18 10:04:34 -04004626 def test_encode_preserves_leading_ws_on_value(self):
4627 msg = Message()
4628 msg['SomeHeader'] = ' value with leading ws'
4629 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4630
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004631
Ezio Melottib3aedd42010-11-20 19:04:17 +00004632
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004633# Test RFC 2231 header parameters (en/de)coding
4634class TestRFC2231(TestEmailBase):
R David Murray97f43c02012-06-24 05:03:27 -04004635
4636 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
4637 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004638 def test_get_param(self):
4639 eq = self.assertEqual
4640 msg = self._msgobj('msg_29.txt')
4641 eq(msg.get_param('title'),
4642 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4643 eq(msg.get_param('title', unquote=False),
4644 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4645
4646 def test_set_param(self):
4647 eq = self.ndiffAssertEqual
4648 msg = Message()
4649 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4650 charset='us-ascii')
4651 eq(msg.get_param('title'),
4652 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4653 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4654 charset='us-ascii', language='en')
4655 eq(msg.get_param('title'),
4656 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4657 msg = self._msgobj('msg_01.txt')
4658 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4659 charset='us-ascii', language='en')
4660 eq(msg.as_string(maxheaderlen=78), """\
4661Return-Path: <bbb@zzz.org>
4662Delivered-To: bbb@zzz.org
4663Received: by mail.zzz.org (Postfix, from userid 889)
4664\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4665MIME-Version: 1.0
4666Content-Transfer-Encoding: 7bit
4667Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4668From: bbb@ddd.com (John X. Doe)
4669To: bbb@zzz.org
4670Subject: This is a test message
4671Date: Fri, 4 May 2001 14:05:44 -0400
4672Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004673 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004674
4675
4676Hi,
4677
4678Do you like this message?
4679
4680-Me
4681""")
4682
R David Murraya2860e82011-04-16 09:20:30 -04004683 def test_set_param_requote(self):
4684 msg = Message()
4685 msg.set_param('title', 'foo')
4686 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4687 msg.set_param('title', 'bar', requote=False)
4688 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4689 # tspecial is still quoted.
4690 msg.set_param('title', "(bar)bell", requote=False)
4691 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4692
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004693 def test_del_param(self):
4694 eq = self.ndiffAssertEqual
4695 msg = self._msgobj('msg_01.txt')
4696 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4697 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4698 charset='us-ascii', language='en')
4699 msg.del_param('foo', header='Content-Type')
4700 eq(msg.as_string(maxheaderlen=78), """\
4701Return-Path: <bbb@zzz.org>
4702Delivered-To: bbb@zzz.org
4703Received: by mail.zzz.org (Postfix, from userid 889)
4704\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4705MIME-Version: 1.0
4706Content-Transfer-Encoding: 7bit
4707Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4708From: bbb@ddd.com (John X. Doe)
4709To: bbb@zzz.org
4710Subject: This is a test message
4711Date: Fri, 4 May 2001 14:05:44 -0400
4712Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004713 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004714
4715
4716Hi,
4717
4718Do you like this message?
4719
4720-Me
4721""")
4722
R David Murray97f43c02012-06-24 05:03:27 -04004723 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_charset
4724 # I changed the charset name, though, because the one in the file isn't
4725 # a legal charset name. Should add a test for an illegal charset.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004726 def test_rfc2231_get_content_charset(self):
4727 eq = self.assertEqual
4728 msg = self._msgobj('msg_32.txt')
4729 eq(msg.get_content_charset(), 'us-ascii')
4730
R David Murray97f43c02012-06-24 05:03:27 -04004731 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_double_quotes
R. David Murraydfd7eb02010-12-24 22:36:49 +00004732 def test_rfc2231_parse_rfc_quoting(self):
4733 m = textwrap.dedent('''\
4734 Content-Disposition: inline;
4735 \tfilename*0*=''This%20is%20even%20more%20;
4736 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4737 \tfilename*2="is it not.pdf"
4738
4739 ''')
4740 msg = email.message_from_string(m)
4741 self.assertEqual(msg.get_filename(),
4742 'This is even more ***fun*** is it not.pdf')
4743 self.assertEqual(m, msg.as_string())
4744
R David Murray97f43c02012-06-24 05:03:27 -04004745 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_with_double_quotes
R. David Murraydfd7eb02010-12-24 22:36:49 +00004746 def test_rfc2231_parse_extra_quoting(self):
4747 m = textwrap.dedent('''\
4748 Content-Disposition: inline;
4749 \tfilename*0*="''This%20is%20even%20more%20";
4750 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4751 \tfilename*2="is it not.pdf"
4752
4753 ''')
4754 msg = email.message_from_string(m)
4755 self.assertEqual(msg.get_filename(),
4756 'This is even more ***fun*** is it not.pdf')
4757 self.assertEqual(m, msg.as_string())
4758
R David Murray97f43c02012-06-24 05:03:27 -04004759 # test_headerregistry.TestContentTypeHeader.rfc2231_no_language_or_charset
4760 # but new test uses *0* because otherwise lang/charset is not valid.
4761 # test_headerregistry.TestContentTypeHeader.rfc2231_segmented_normal_values
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004762 def test_rfc2231_no_language_or_charset(self):
4763 m = '''\
4764Content-Transfer-Encoding: 8bit
4765Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4766Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4767
4768'''
4769 msg = email.message_from_string(m)
4770 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004771 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004772 self.assertEqual(
4773 param,
4774 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4775
R David Murray97f43c02012-06-24 05:03:27 -04004776 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_no_charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004777 def test_rfc2231_no_language_or_charset_in_filename(self):
4778 m = '''\
4779Content-Disposition: inline;
4780\tfilename*0*="''This%20is%20even%20more%20";
4781\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4782\tfilename*2="is it not.pdf"
4783
4784'''
4785 msg = email.message_from_string(m)
4786 self.assertEqual(msg.get_filename(),
4787 'This is even more ***fun*** is it not.pdf')
4788
R David Murray97f43c02012-06-24 05:03:27 -04004789 # Duplicate of previous test?
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004790 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4791 m = '''\
4792Content-Disposition: inline;
4793\tfilename*0*="''This%20is%20even%20more%20";
4794\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4795\tfilename*2="is it not.pdf"
4796
4797'''
4798 msg = email.message_from_string(m)
4799 self.assertEqual(msg.get_filename(),
4800 'This is even more ***fun*** is it not.pdf')
4801
R David Murray97f43c02012-06-24 05:03:27 -04004802 # test_headerregistry.TestContentTypeHeader.rfc2231_partly_encoded,
4803 # but the test below is wrong (the first part should be decoded).
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004804 def test_rfc2231_partly_encoded(self):
4805 m = '''\
4806Content-Disposition: inline;
4807\tfilename*0="''This%20is%20even%20more%20";
4808\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4809\tfilename*2="is it not.pdf"
4810
4811'''
4812 msg = email.message_from_string(m)
4813 self.assertEqual(
4814 msg.get_filename(),
4815 'This%20is%20even%20more%20***fun*** is it not.pdf')
4816
4817 def test_rfc2231_partly_nonencoded(self):
4818 m = '''\
4819Content-Disposition: inline;
4820\tfilename*0="This%20is%20even%20more%20";
4821\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4822\tfilename*2="is it not.pdf"
4823
4824'''
4825 msg = email.message_from_string(m)
4826 self.assertEqual(
4827 msg.get_filename(),
4828 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4829
4830 def test_rfc2231_no_language_or_charset_in_boundary(self):
4831 m = '''\
4832Content-Type: multipart/alternative;
4833\tboundary*0*="''This%20is%20even%20more%20";
4834\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4835\tboundary*2="is it not.pdf"
4836
4837'''
4838 msg = email.message_from_string(m)
4839 self.assertEqual(msg.get_boundary(),
4840 'This is even more ***fun*** is it not.pdf')
4841
4842 def test_rfc2231_no_language_or_charset_in_charset(self):
4843 # This is a nonsensical charset value, but tests the code anyway
4844 m = '''\
4845Content-Type: text/plain;
4846\tcharset*0*="This%20is%20even%20more%20";
4847\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4848\tcharset*2="is it not.pdf"
4849
4850'''
4851 msg = email.message_from_string(m)
4852 self.assertEqual(msg.get_content_charset(),
4853 'this is even more ***fun*** is it not.pdf')
4854
R David Murray97f43c02012-06-24 05:03:27 -04004855 # test_headerregistry.TestContentTypeHeader.rfc2231_unknown_charset_treated_as_ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004856 def test_rfc2231_bad_encoding_in_filename(self):
4857 m = '''\
4858Content-Disposition: inline;
4859\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4860\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4861\tfilename*2="is it not.pdf"
4862
4863'''
4864 msg = email.message_from_string(m)
4865 self.assertEqual(msg.get_filename(),
4866 'This is even more ***fun*** is it not.pdf')
4867
4868 def test_rfc2231_bad_encoding_in_charset(self):
4869 m = """\
4870Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4871
4872"""
4873 msg = email.message_from_string(m)
4874 # This should return None because non-ascii characters in the charset
4875 # are not allowed.
4876 self.assertEqual(msg.get_content_charset(), None)
4877
4878 def test_rfc2231_bad_character_in_charset(self):
4879 m = """\
4880Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4881
4882"""
4883 msg = email.message_from_string(m)
4884 # This should return None because non-ascii characters in the charset
4885 # are not allowed.
4886 self.assertEqual(msg.get_content_charset(), None)
4887
4888 def test_rfc2231_bad_character_in_filename(self):
4889 m = '''\
4890Content-Disposition: inline;
4891\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4892\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4893\tfilename*2*="is it not.pdf%E2"
4894
4895'''
4896 msg = email.message_from_string(m)
4897 self.assertEqual(msg.get_filename(),
4898 'This is even more ***fun*** is it not.pdf\ufffd')
4899
4900 def test_rfc2231_unknown_encoding(self):
4901 m = """\
4902Content-Transfer-Encoding: 8bit
4903Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4904
4905"""
4906 msg = email.message_from_string(m)
4907 self.assertEqual(msg.get_filename(), 'myfile.txt')
4908
4909 def test_rfc2231_single_tick_in_filename_extended(self):
4910 eq = self.assertEqual
4911 m = """\
4912Content-Type: application/x-foo;
4913\tname*0*=\"Frank's\"; name*1*=\" Document\"
4914
4915"""
4916 msg = email.message_from_string(m)
4917 charset, language, s = msg.get_param('name')
4918 eq(charset, None)
4919 eq(language, None)
4920 eq(s, "Frank's Document")
4921
R David Murray97f43c02012-06-24 05:03:27 -04004922 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_inside_double_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004923 def test_rfc2231_single_tick_in_filename(self):
4924 m = """\
4925Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4926
4927"""
4928 msg = email.message_from_string(m)
4929 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004930 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004931 self.assertEqual(param, "Frank's Document")
4932
R David Murray97f43c02012-06-24 05:03:27 -04004933 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_value_with_charset_and_lang
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004934 def test_rfc2231_tick_attack_extended(self):
4935 eq = self.assertEqual
4936 m = """\
4937Content-Type: application/x-foo;
4938\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4939
4940"""
4941 msg = email.message_from_string(m)
4942 charset, language, s = msg.get_param('name')
4943 eq(charset, 'us-ascii')
4944 eq(language, 'en-us')
4945 eq(s, "Frank's Document")
4946
R David Murray97f43c02012-06-24 05:03:27 -04004947 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quote_in_non_encoded_value
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004948 def test_rfc2231_tick_attack(self):
4949 m = """\
4950Content-Type: application/x-foo;
4951\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4952
4953"""
4954 msg = email.message_from_string(m)
4955 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004956 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004957 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4958
R David Murray97f43c02012-06-24 05:03:27 -04004959 # test_headerregistry.TestContentTypeHeader.rfc2231_single_quotes_inside_quotes
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004960 def test_rfc2231_no_extended_values(self):
4961 eq = self.assertEqual
4962 m = """\
4963Content-Type: application/x-foo; name=\"Frank's Document\"
4964
4965"""
4966 msg = email.message_from_string(m)
4967 eq(msg.get_param('name'), "Frank's Document")
4968
R David Murray97f43c02012-06-24 05:03:27 -04004969 # test_headerregistry.TestContentTypeHeader.rfc2231_encoded_then_unencoded_segments
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004970 def test_rfc2231_encoded_then_unencoded_segments(self):
4971 eq = self.assertEqual
4972 m = """\
4973Content-Type: application/x-foo;
4974\tname*0*=\"us-ascii'en-us'My\";
4975\tname*1=\" Document\";
4976\tname*2*=\" For You\"
4977
4978"""
4979 msg = email.message_from_string(m)
4980 charset, language, s = msg.get_param('name')
4981 eq(charset, 'us-ascii')
4982 eq(language, 'en-us')
4983 eq(s, 'My Document For You')
4984
R David Murray97f43c02012-06-24 05:03:27 -04004985 # test_headerregistry.TestContentTypeHeader.rfc2231_unencoded_then_encoded_segments
4986 # test_headerregistry.TestContentTypeHeader.rfc2231_quoted_unencoded_then_encoded_segments
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004987 def test_rfc2231_unencoded_then_encoded_segments(self):
4988 eq = self.assertEqual
4989 m = """\
4990Content-Type: application/x-foo;
4991\tname*0=\"us-ascii'en-us'My\";
4992\tname*1*=\" Document\";
4993\tname*2*=\" For You\"
4994
4995"""
4996 msg = email.message_from_string(m)
4997 charset, language, s = msg.get_param('name')
4998 eq(charset, 'us-ascii')
4999 eq(language, 'en-us')
5000 eq(s, 'My Document For You')
5001
5002
Ezio Melottib3aedd42010-11-20 19:04:17 +00005003
R. David Murraya8f480f2010-01-16 18:30:03 +00005004# Tests to ensure that signed parts of an email are completely preserved, as
5005# required by RFC1847 section 2.1. Note that these are incomplete, because the
5006# email package does not currently always preserve the body. See issue 1670765.
5007class TestSigned(TestEmailBase):
5008
5009 def _msg_and_obj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -04005010 with openfile(filename) as fp:
R. David Murraya8f480f2010-01-16 18:30:03 +00005011 original = fp.read()
5012 msg = email.message_from_string(original)
5013 return original, msg
5014
5015 def _signed_parts_eq(self, original, result):
5016 # Extract the first mime part of each message
5017 import re
5018 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
5019 inpart = repart.search(original).group(2)
5020 outpart = repart.search(result).group(2)
5021 self.assertEqual(outpart, inpart)
5022
5023 def test_long_headers_as_string(self):
5024 original, msg = self._msg_and_obj('msg_45.txt')
5025 result = msg.as_string()
5026 self._signed_parts_eq(original, result)
5027
5028 def test_long_headers_as_string_maxheaderlen(self):
5029 original, msg = self._msg_and_obj('msg_45.txt')
5030 result = msg.as_string(maxheaderlen=60)
5031 self._signed_parts_eq(original, result)
5032
5033 def test_long_headers_flatten(self):
5034 original, msg = self._msg_and_obj('msg_45.txt')
5035 fp = StringIO()
5036 Generator(fp).flatten(msg)
5037 result = fp.getvalue()
5038 self._signed_parts_eq(original, result)
5039
5040
Ezio Melottib3aedd42010-11-20 19:04:17 +00005041
Guido van Rossum8b3febe2007-08-30 01:15:14 +00005042if __name__ == '__main__':
R David Murray9aaba782011-03-21 17:17:06 -04005043 unittest.main()