blob: b7ad667d109921c2d17e58b8f118a011f7904f7f [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
R David Murrayc27e5222012-05-25 15:01:48 -040019import email.policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +000020
21from email.charset import Charset
22from email.header import Header, decode_header, make_header
23from email.parser import Parser, HeaderParser
24from email.generator import Generator, DecodedGenerator
25from email.message import Message
26from email.mime.application import MIMEApplication
27from email.mime.audio import MIMEAudio
28from email.mime.text import MIMEText
29from email.mime.image import MIMEImage
30from email.mime.base import MIMEBase
31from email.mime.message import MIMEMessage
32from email.mime.multipart import MIMEMultipart
33from email import utils
34from email import errors
35from email import encoders
36from email import iterators
37from email import base64mime
38from email import quoprimime
39
R David Murray28346b82011-03-31 11:40:20 -040040from test.support import run_unittest, unlink
R David Murraya256bac2011-03-31 12:20:23 -040041from test.test_email import openfile, TestEmailBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +000042
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Guido van Rossum8b3febe2007-08-30 01:15:14 +000048# Test various aspects of the Message class's API
49class TestMessageAPI(TestEmailBase):
50 def test_get_all(self):
51 eq = self.assertEqual
52 msg = self._msgobj('msg_20.txt')
53 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
54 eq(msg.get_all('xx', 'n/a'), 'n/a')
55
R. David Murraye5db2632010-11-20 15:10:13 +000056 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000057 eq = self.assertEqual
58 msg = Message()
59 eq(msg.get_charset(), None)
60 charset = Charset('iso-8859-1')
61 msg.set_charset(charset)
62 eq(msg['mime-version'], '1.0')
63 eq(msg.get_content_type(), 'text/plain')
64 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
65 eq(msg.get_param('charset'), 'iso-8859-1')
66 eq(msg['content-transfer-encoding'], 'quoted-printable')
67 eq(msg.get_charset().input_charset, 'iso-8859-1')
68 # Remove the charset
69 msg.set_charset(None)
70 eq(msg.get_charset(), None)
71 eq(msg['content-type'], 'text/plain')
72 # Try adding a charset when there's already MIME headers present
73 msg = Message()
74 msg['MIME-Version'] = '2.0'
75 msg['Content-Type'] = 'text/x-weird'
76 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
77 msg.set_charset(charset)
78 eq(msg['mime-version'], '2.0')
79 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
80 eq(msg['content-transfer-encoding'], 'quinted-puntable')
81
82 def test_set_charset_from_string(self):
83 eq = self.assertEqual
84 msg = Message()
85 msg.set_charset('us-ascii')
86 eq(msg.get_charset().input_charset, 'us-ascii')
87 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
88
89 def test_set_payload_with_charset(self):
90 msg = Message()
91 charset = Charset('iso-8859-1')
92 msg.set_payload('This is a string payload', charset)
93 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
94
95 def test_get_charsets(self):
96 eq = self.assertEqual
97
98 msg = self._msgobj('msg_08.txt')
99 charsets = msg.get_charsets()
100 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
101
102 msg = self._msgobj('msg_09.txt')
103 charsets = msg.get_charsets('dingbat')
104 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
105 'koi8-r'])
106
107 msg = self._msgobj('msg_12.txt')
108 charsets = msg.get_charsets()
109 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
110 'iso-8859-3', 'us-ascii', 'koi8-r'])
111
112 def test_get_filename(self):
113 eq = self.assertEqual
114
115 msg = self._msgobj('msg_04.txt')
116 filenames = [p.get_filename() for p in msg.get_payload()]
117 eq(filenames, ['msg.txt', 'msg.txt'])
118
119 msg = self._msgobj('msg_07.txt')
120 subpart = msg.get_payload(1)
121 eq(subpart.get_filename(), 'dingusfish.gif')
122
123 def test_get_filename_with_name_parameter(self):
124 eq = self.assertEqual
125
126 msg = self._msgobj('msg_44.txt')
127 filenames = [p.get_filename() for p in msg.get_payload()]
128 eq(filenames, ['msg.txt', 'msg.txt'])
129
130 def test_get_boundary(self):
131 eq = self.assertEqual
132 msg = self._msgobj('msg_07.txt')
133 # No quotes!
134 eq(msg.get_boundary(), 'BOUNDARY')
135
136 def test_set_boundary(self):
137 eq = self.assertEqual
138 # This one has no existing boundary parameter, but the Content-Type:
139 # header appears fifth.
140 msg = self._msgobj('msg_01.txt')
141 msg.set_boundary('BOUNDARY')
142 header, value = msg.items()[4]
143 eq(header.lower(), 'content-type')
144 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
145 # This one has a Content-Type: header, with a boundary, stuck in the
146 # middle of its headers. Make sure the order is preserved; it should
147 # be fifth.
148 msg = self._msgobj('msg_04.txt')
149 msg.set_boundary('BOUNDARY')
150 header, value = msg.items()[4]
151 eq(header.lower(), 'content-type')
152 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
153 # And this one has no Content-Type: header at all.
154 msg = self._msgobj('msg_03.txt')
155 self.assertRaises(errors.HeaderParseError,
156 msg.set_boundary, 'BOUNDARY')
157
R. David Murray73a559d2010-12-21 18:07:59 +0000158 def test_make_boundary(self):
159 msg = MIMEMultipart('form-data')
160 # Note that when the boundary gets created is an implementation
161 # detail and might change.
162 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
163 # Trigger creation of boundary
164 msg.as_string()
165 self.assertEqual(msg.items()[0][1][:33],
166 'multipart/form-data; boundary="==')
167 # XXX: there ought to be tests of the uniqueness of the boundary, too.
168
R. David Murray57c45ac2010-02-21 04:39:40 +0000169 def test_message_rfc822_only(self):
170 # Issue 7970: message/rfc822 not in multipart parsed by
171 # HeaderParser caused an exception when flattened.
R David Murray28346b82011-03-31 11:40:20 -0400172 with openfile('msg_46.txt') as fp:
Brett Cannon384917a2010-10-29 23:08:36 +0000173 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000174 parser = HeaderParser()
175 msg = parser.parsestr(msgdata)
176 out = StringIO()
177 gen = Generator(out, True, 0)
178 gen.flatten(msg, False)
179 self.assertEqual(out.getvalue(), msgdata)
180
R David Murrayb35c8502011-04-13 16:46:05 -0400181 def test_byte_message_rfc822_only(self):
182 # Make sure new bytes header parser also passes this.
183 with openfile('msg_46.txt', 'rb') as fp:
184 msgdata = fp.read()
185 parser = email.parser.BytesHeaderParser()
186 msg = parser.parsebytes(msgdata)
187 out = BytesIO()
188 gen = email.generator.BytesGenerator(out)
189 gen.flatten(msg)
190 self.assertEqual(out.getvalue(), msgdata)
191
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000192 def test_get_decoded_payload(self):
193 eq = self.assertEqual
194 msg = self._msgobj('msg_10.txt')
195 # The outer message is a multipart
196 eq(msg.get_payload(decode=True), None)
197 # Subpart 1 is 7bit encoded
198 eq(msg.get_payload(0).get_payload(decode=True),
199 b'This is a 7bit encoded message.\n')
200 # Subpart 2 is quopri
201 eq(msg.get_payload(1).get_payload(decode=True),
202 b'\xa1This is a Quoted Printable encoded message!\n')
203 # Subpart 3 is base64
204 eq(msg.get_payload(2).get_payload(decode=True),
205 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000206 # Subpart 4 is base64 with a trailing newline, which
207 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000208 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000209 b'This is a Base64 encoded message.\n')
210 # Subpart 5 has no Content-Transfer-Encoding: header.
211 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000212 b'This has no Content-Transfer-Encoding: header.\n')
213
214 def test_get_decoded_uu_payload(self):
215 eq = self.assertEqual
216 msg = Message()
217 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
218 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
219 msg['content-transfer-encoding'] = cte
220 eq(msg.get_payload(decode=True), b'hello world')
221 # Now try some bogus data
222 msg.set_payload('foo')
223 eq(msg.get_payload(decode=True), b'foo')
224
R David Murraya2860e82011-04-16 09:20:30 -0400225 def test_get_payload_n_raises_on_non_multipart(self):
226 msg = Message()
227 self.assertRaises(TypeError, msg.get_payload, 1)
228
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000229 def test_decoded_generator(self):
230 eq = self.assertEqual
231 msg = self._msgobj('msg_07.txt')
232 with openfile('msg_17.txt') as fp:
233 text = fp.read()
234 s = StringIO()
235 g = DecodedGenerator(s)
236 g.flatten(msg)
237 eq(s.getvalue(), text)
238
239 def test__contains__(self):
240 msg = Message()
241 msg['From'] = 'Me'
242 msg['to'] = 'You'
243 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000244 self.assertTrue('from' in msg)
245 self.assertTrue('From' in msg)
246 self.assertTrue('FROM' in msg)
247 self.assertTrue('to' in msg)
248 self.assertTrue('To' in msg)
249 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000250
251 def test_as_string(self):
252 eq = self.ndiffAssertEqual
253 msg = self._msgobj('msg_01.txt')
254 with openfile('msg_01.txt') as fp:
255 text = fp.read()
256 eq(text, str(msg))
257 fullrepr = msg.as_string(unixfrom=True)
258 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000259 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000260 eq(text, NL.join(lines[1:]))
261
262 def test_bad_param(self):
263 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
264 self.assertEqual(msg.get_param('baz'), '')
265
266 def test_missing_filename(self):
267 msg = email.message_from_string("From: foo\n")
268 self.assertEqual(msg.get_filename(), None)
269
270 def test_bogus_filename(self):
271 msg = email.message_from_string(
272 "Content-Disposition: blarg; filename\n")
273 self.assertEqual(msg.get_filename(), '')
274
275 def test_missing_boundary(self):
276 msg = email.message_from_string("From: foo\n")
277 self.assertEqual(msg.get_boundary(), None)
278
279 def test_get_params(self):
280 eq = self.assertEqual
281 msg = email.message_from_string(
282 'X-Header: foo=one; bar=two; baz=three\n')
283 eq(msg.get_params(header='x-header'),
284 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
285 msg = email.message_from_string(
286 'X-Header: foo; bar=one; baz=two\n')
287 eq(msg.get_params(header='x-header'),
288 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
289 eq(msg.get_params(), None)
290 msg = email.message_from_string(
291 'X-Header: foo; bar="one"; baz=two\n')
292 eq(msg.get_params(header='x-header'),
293 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
294
295 def test_get_param_liberal(self):
296 msg = Message()
297 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
298 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
299
300 def test_get_param(self):
301 eq = self.assertEqual
302 msg = email.message_from_string(
303 "X-Header: foo=one; bar=two; baz=three\n")
304 eq(msg.get_param('bar', header='x-header'), 'two')
305 eq(msg.get_param('quuz', header='x-header'), None)
306 eq(msg.get_param('quuz'), None)
307 msg = email.message_from_string(
308 'X-Header: foo; bar="one"; baz=two\n')
309 eq(msg.get_param('foo', header='x-header'), '')
310 eq(msg.get_param('bar', header='x-header'), 'one')
311 eq(msg.get_param('baz', header='x-header'), 'two')
312 # XXX: We are not RFC-2045 compliant! We cannot parse:
313 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
314 # msg.get_param("weird")
315 # yet.
316
317 def test_get_param_funky_continuation_lines(self):
318 msg = self._msgobj('msg_22.txt')
319 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
320
321 def test_get_param_with_semis_in_quotes(self):
322 msg = email.message_from_string(
323 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
324 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
325 self.assertEqual(msg.get_param('name', unquote=False),
326 '"Jim&amp;&amp;Jill"')
327
R. David Murrayd48739f2010-04-14 18:59:18 +0000328 def test_get_param_with_quotes(self):
329 msg = email.message_from_string(
330 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
331 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
332 msg = email.message_from_string(
333 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
334 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
335
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000336 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000337 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000338 msg = email.message_from_string('Header: exists')
339 unless('header' in msg)
340 unless('Header' in msg)
341 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000342 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000343
344 def test_set_param(self):
345 eq = self.assertEqual
346 msg = Message()
347 msg.set_param('charset', 'iso-2022-jp')
348 eq(msg.get_param('charset'), 'iso-2022-jp')
349 msg.set_param('importance', 'high value')
350 eq(msg.get_param('importance'), 'high value')
351 eq(msg.get_param('importance', unquote=False), '"high value"')
352 eq(msg.get_params(), [('text/plain', ''),
353 ('charset', 'iso-2022-jp'),
354 ('importance', 'high value')])
355 eq(msg.get_params(unquote=False), [('text/plain', ''),
356 ('charset', '"iso-2022-jp"'),
357 ('importance', '"high value"')])
358 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
359 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
360
361 def test_del_param(self):
362 eq = self.assertEqual
363 msg = self._msgobj('msg_05.txt')
364 eq(msg.get_params(),
365 [('multipart/report', ''), ('report-type', 'delivery-status'),
366 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
367 old_val = msg.get_param("report-type")
368 msg.del_param("report-type")
369 eq(msg.get_params(),
370 [('multipart/report', ''),
371 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
372 msg.set_param("report-type", old_val)
373 eq(msg.get_params(),
374 [('multipart/report', ''),
375 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
376 ('report-type', old_val)])
377
378 def test_del_param_on_other_header(self):
379 msg = Message()
380 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
381 msg.del_param('filename', 'content-disposition')
382 self.assertEqual(msg['content-disposition'], 'attachment')
383
R David Murraya2860e82011-04-16 09:20:30 -0400384 def test_del_param_on_nonexistent_header(self):
385 msg = Message()
386 msg.del_param('filename', 'content-disposition')
387
388 def test_del_nonexistent_param(self):
389 msg = Message()
390 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
391 existing_header = msg['Content-Type']
392 msg.del_param('foobar', header='Content-Type')
393 self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"')
394
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000395 def test_set_type(self):
396 eq = self.assertEqual
397 msg = Message()
398 self.assertRaises(ValueError, msg.set_type, 'text')
399 msg.set_type('text/plain')
400 eq(msg['content-type'], 'text/plain')
401 msg.set_param('charset', 'us-ascii')
402 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
403 msg.set_type('text/html')
404 eq(msg['content-type'], 'text/html; charset="us-ascii"')
405
406 def test_set_type_on_other_header(self):
407 msg = Message()
408 msg['X-Content-Type'] = 'text/plain'
409 msg.set_type('application/octet-stream', 'X-Content-Type')
410 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
411
412 def test_get_content_type_missing(self):
413 msg = Message()
414 self.assertEqual(msg.get_content_type(), 'text/plain')
415
416 def test_get_content_type_missing_with_default_type(self):
417 msg = Message()
418 msg.set_default_type('message/rfc822')
419 self.assertEqual(msg.get_content_type(), 'message/rfc822')
420
421 def test_get_content_type_from_message_implicit(self):
422 msg = self._msgobj('msg_30.txt')
423 self.assertEqual(msg.get_payload(0).get_content_type(),
424 'message/rfc822')
425
426 def test_get_content_type_from_message_explicit(self):
427 msg = self._msgobj('msg_28.txt')
428 self.assertEqual(msg.get_payload(0).get_content_type(),
429 'message/rfc822')
430
431 def test_get_content_type_from_message_text_plain_implicit(self):
432 msg = self._msgobj('msg_03.txt')
433 self.assertEqual(msg.get_content_type(), 'text/plain')
434
435 def test_get_content_type_from_message_text_plain_explicit(self):
436 msg = self._msgobj('msg_01.txt')
437 self.assertEqual(msg.get_content_type(), 'text/plain')
438
439 def test_get_content_maintype_missing(self):
440 msg = Message()
441 self.assertEqual(msg.get_content_maintype(), 'text')
442
443 def test_get_content_maintype_missing_with_default_type(self):
444 msg = Message()
445 msg.set_default_type('message/rfc822')
446 self.assertEqual(msg.get_content_maintype(), 'message')
447
448 def test_get_content_maintype_from_message_implicit(self):
449 msg = self._msgobj('msg_30.txt')
450 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
451
452 def test_get_content_maintype_from_message_explicit(self):
453 msg = self._msgobj('msg_28.txt')
454 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
455
456 def test_get_content_maintype_from_message_text_plain_implicit(self):
457 msg = self._msgobj('msg_03.txt')
458 self.assertEqual(msg.get_content_maintype(), 'text')
459
460 def test_get_content_maintype_from_message_text_plain_explicit(self):
461 msg = self._msgobj('msg_01.txt')
462 self.assertEqual(msg.get_content_maintype(), 'text')
463
464 def test_get_content_subtype_missing(self):
465 msg = Message()
466 self.assertEqual(msg.get_content_subtype(), 'plain')
467
468 def test_get_content_subtype_missing_with_default_type(self):
469 msg = Message()
470 msg.set_default_type('message/rfc822')
471 self.assertEqual(msg.get_content_subtype(), 'rfc822')
472
473 def test_get_content_subtype_from_message_implicit(self):
474 msg = self._msgobj('msg_30.txt')
475 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
476
477 def test_get_content_subtype_from_message_explicit(self):
478 msg = self._msgobj('msg_28.txt')
479 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
480
481 def test_get_content_subtype_from_message_text_plain_implicit(self):
482 msg = self._msgobj('msg_03.txt')
483 self.assertEqual(msg.get_content_subtype(), 'plain')
484
485 def test_get_content_subtype_from_message_text_plain_explicit(self):
486 msg = self._msgobj('msg_01.txt')
487 self.assertEqual(msg.get_content_subtype(), 'plain')
488
489 def test_get_content_maintype_error(self):
490 msg = Message()
491 msg['Content-Type'] = 'no-slash-in-this-string'
492 self.assertEqual(msg.get_content_maintype(), 'text')
493
494 def test_get_content_subtype_error(self):
495 msg = Message()
496 msg['Content-Type'] = 'no-slash-in-this-string'
497 self.assertEqual(msg.get_content_subtype(), 'plain')
498
499 def test_replace_header(self):
500 eq = self.assertEqual
501 msg = Message()
502 msg.add_header('First', 'One')
503 msg.add_header('Second', 'Two')
504 msg.add_header('Third', 'Three')
505 eq(msg.keys(), ['First', 'Second', 'Third'])
506 eq(msg.values(), ['One', 'Two', 'Three'])
507 msg.replace_header('Second', 'Twenty')
508 eq(msg.keys(), ['First', 'Second', 'Third'])
509 eq(msg.values(), ['One', 'Twenty', 'Three'])
510 msg.add_header('First', 'Eleven')
511 msg.replace_header('First', 'One Hundred')
512 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
513 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
514 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
515
R David Murray80e0aee2012-05-27 21:23:34 -0400516 # test_defect_handling:test_invalid_chars_in_base64_payload
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000517 def test_broken_base64_payload(self):
518 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
519 msg = Message()
520 msg['content-type'] = 'audio/x-midi'
521 msg['content-transfer-encoding'] = 'base64'
522 msg.set_payload(x)
523 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -0400524 (b'\x03\x00\xe9\xd0\xfe\xff\xff.\x8b\xc0'
525 b'\xa1\x00p\xf6\xbf\xe9\x0f'))
526 self.assertIsInstance(msg.defects[0],
527 errors.InvalidBase64CharactersDefect)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000528
R David Murraya2860e82011-04-16 09:20:30 -0400529 def test_broken_unicode_payload(self):
530 # This test improves coverage but is not a compliance test.
531 # The behavior in this situation is currently undefined by the API.
532 x = 'this is a br\xf6ken thing to do'
533 msg = Message()
534 msg['content-type'] = 'text/plain'
535 msg['content-transfer-encoding'] = '8bit'
536 msg.set_payload(x)
537 self.assertEqual(msg.get_payload(decode=True),
538 bytes(x, 'raw-unicode-escape'))
539
540 def test_questionable_bytes_payload(self):
541 # This test improves coverage but is not a compliance test,
542 # since it involves poking inside the black box.
543 x = 'this is a quéstionable thing to do'.encode('utf-8')
544 msg = Message()
545 msg['content-type'] = 'text/plain; charset="utf-8"'
546 msg['content-transfer-encoding'] = '8bit'
547 msg._payload = x
548 self.assertEqual(msg.get_payload(decode=True), x)
549
R. David Murray7ec754b2010-12-13 23:51:19 +0000550 # Issue 1078919
551 def test_ascii_add_header(self):
552 msg = Message()
553 msg.add_header('Content-Disposition', 'attachment',
554 filename='bud.gif')
555 self.assertEqual('attachment; filename="bud.gif"',
556 msg['Content-Disposition'])
557
558 def test_noascii_add_header(self):
559 msg = Message()
560 msg.add_header('Content-Disposition', 'attachment',
561 filename="Fußballer.ppt")
562 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000563 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000564 msg['Content-Disposition'])
565
566 def test_nonascii_add_header_via_triple(self):
567 msg = Message()
568 msg.add_header('Content-Disposition', 'attachment',
569 filename=('iso-8859-1', '', 'Fußballer.ppt'))
570 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000571 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
572 msg['Content-Disposition'])
573
574 def test_ascii_add_header_with_tspecial(self):
575 msg = Message()
576 msg.add_header('Content-Disposition', 'attachment',
577 filename="windows [filename].ppt")
578 self.assertEqual(
579 'attachment; filename="windows [filename].ppt"',
580 msg['Content-Disposition'])
581
582 def test_nonascii_add_header_with_tspecial(self):
583 msg = Message()
584 msg.add_header('Content-Disposition', 'attachment',
585 filename="Fußballer [filename].ppt")
586 self.assertEqual(
587 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000588 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000589
R David Murraya2860e82011-04-16 09:20:30 -0400590 def test_add_header_with_name_only_param(self):
591 msg = Message()
592 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
593 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
594
595 def test_add_header_with_no_value(self):
596 msg = Message()
597 msg.add_header('X-Status', None)
598 self.assertEqual('', msg['X-Status'])
599
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000600 # Issue 5871: reject an attempt to embed a header inside a header value
601 # (header injection attack).
602 def test_embeded_header_via_Header_rejected(self):
603 msg = Message()
604 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
605 self.assertRaises(errors.HeaderParseError, msg.as_string)
606
607 def test_embeded_header_via_string_rejected(self):
608 msg = Message()
609 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
610 self.assertRaises(errors.HeaderParseError, msg.as_string)
611
R David Murray7441a7a2012-03-14 02:59:51 -0400612 def test_unicode_header_defaults_to_utf8_encoding(self):
613 # Issue 14291
614 m = MIMEText('abc\n')
615 m['Subject'] = 'É test'
616 self.assertEqual(str(m),textwrap.dedent("""\
617 Content-Type: text/plain; charset="us-ascii"
618 MIME-Version: 1.0
619 Content-Transfer-Encoding: 7bit
620 Subject: =?utf-8?q?=C3=89_test?=
621
622 abc
623 """))
624
R David Murray8680bcc2012-03-22 22:17:51 -0400625 def test_unicode_body_defaults_to_utf8_encoding(self):
626 # Issue 14291
627 m = MIMEText('É testabc\n')
628 self.assertEqual(str(m),textwrap.dedent("""\
R David Murray8680bcc2012-03-22 22:17:51 -0400629 Content-Type: text/plain; charset="utf-8"
R David Murray42243c42012-03-22 22:40:44 -0400630 MIME-Version: 1.0
R David Murray8680bcc2012-03-22 22:17:51 -0400631 Content-Transfer-Encoding: base64
632
633 w4kgdGVzdGFiYwo=
634 """))
635
636
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000637# Test the email.encoders module
638class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400639
640 def test_EncodersEncode_base64(self):
641 with openfile('PyBanner048.gif', 'rb') as fp:
642 bindata = fp.read()
643 mimed = email.mime.image.MIMEImage(bindata)
644 base64ed = mimed.get_payload()
645 # the transfer-encoded body lines should all be <=76 characters
646 lines = base64ed.split('\n')
647 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
648
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000649 def test_encode_empty_payload(self):
650 eq = self.assertEqual
651 msg = Message()
652 msg.set_charset('us-ascii')
653 eq(msg['content-transfer-encoding'], '7bit')
654
655 def test_default_cte(self):
656 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000657 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000658 msg = MIMEText('hello world')
659 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000660 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000661 msg = MIMEText('hello \xf8 world')
R David Murray8680bcc2012-03-22 22:17:51 -0400662 eq(msg['content-transfer-encoding'], 'base64')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000663 # And now with a different charset
664 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
665 eq(msg['content-transfer-encoding'], 'quoted-printable')
666
R. David Murraye85200d2010-05-06 01:41:14 +0000667 def test_encode7or8bit(self):
668 # Make sure a charset whose input character set is 8bit but
669 # whose output character set is 7bit gets a transfer-encoding
670 # of 7bit.
671 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000672 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000673 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000674
Ezio Melottib3aedd42010-11-20 19:04:17 +0000675
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000676# Test long header wrapping
677class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400678
679 maxDiff = None
680
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000681 def test_split_long_continuation(self):
682 eq = self.ndiffAssertEqual
683 msg = email.message_from_string("""\
684Subject: bug demonstration
685\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
686\tmore text
687
688test
689""")
690 sfp = StringIO()
691 g = Generator(sfp)
692 g.flatten(msg)
693 eq(sfp.getvalue(), """\
694Subject: bug demonstration
695\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
696\tmore text
697
698test
699""")
700
701 def test_another_long_almost_unsplittable_header(self):
702 eq = self.ndiffAssertEqual
703 hstr = """\
704bug demonstration
705\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
706\tmore text"""
707 h = Header(hstr, continuation_ws='\t')
708 eq(h.encode(), """\
709bug demonstration
710\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
711\tmore text""")
712 h = Header(hstr.replace('\t', ' '))
713 eq(h.encode(), """\
714bug demonstration
715 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
716 more text""")
717
718 def test_long_nonstring(self):
719 eq = self.ndiffAssertEqual
720 g = Charset("iso-8859-1")
721 cz = Charset("iso-8859-2")
722 utf8 = Charset("utf-8")
723 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
724 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
725 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
726 b'bef\xf6rdert. ')
727 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
728 b'd\xf9vtipu.. ')
729 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
730 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
731 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
732 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
733 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
734 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
735 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
736 '\u3044\u307e\u3059\u3002')
737 h = Header(g_head, g, header_name='Subject')
738 h.append(cz_head, cz)
739 h.append(utf8_head, utf8)
740 msg = Message()
741 msg['Subject'] = h
742 sfp = StringIO()
743 g = Generator(sfp)
744 g.flatten(msg)
745 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000746Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
747 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
748 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
749 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
750 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
751 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
752 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
753 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
754 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
755 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
756 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000757
758""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000759 eq(h.encode(maxlinelen=76), """\
760=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
761 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
762 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
763 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
764 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
765 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
766 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
767 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
768 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
769 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
770 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000771
772 def test_long_header_encode(self):
773 eq = self.ndiffAssertEqual
774 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
775 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
776 header_name='X-Foobar-Spoink-Defrobnit')
777 eq(h.encode(), '''\
778wasnipoop; giraffes="very-long-necked-animals";
779 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
780
781 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
782 eq = self.ndiffAssertEqual
783 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
784 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
785 header_name='X-Foobar-Spoink-Defrobnit',
786 continuation_ws='\t')
787 eq(h.encode(), '''\
788wasnipoop; giraffes="very-long-necked-animals";
789 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
790
791 def test_long_header_encode_with_tab_continuation(self):
792 eq = self.ndiffAssertEqual
793 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
794 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
795 header_name='X-Foobar-Spoink-Defrobnit',
796 continuation_ws='\t')
797 eq(h.encode(), '''\
798wasnipoop; giraffes="very-long-necked-animals";
799\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
800
R David Murray3a6152f2011-03-14 21:13:03 -0400801 def test_header_encode_with_different_output_charset(self):
802 h = Header('文', 'euc-jp')
803 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
804
805 def test_long_header_encode_with_different_output_charset(self):
806 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
807 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
808 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
809 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
810 res = """\
811=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
812 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
813 self.assertEqual(h.encode(), res)
814
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000815 def test_header_splitter(self):
816 eq = self.ndiffAssertEqual
817 msg = MIMEText('')
818 # It'd be great if we could use add_header() here, but that doesn't
819 # guarantee an order of the parameters.
820 msg['X-Foobar-Spoink-Defrobnit'] = (
821 'wasnipoop; giraffes="very-long-necked-animals"; '
822 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
823 sfp = StringIO()
824 g = Generator(sfp)
825 g.flatten(msg)
826 eq(sfp.getvalue(), '''\
827Content-Type: text/plain; charset="us-ascii"
828MIME-Version: 1.0
829Content-Transfer-Encoding: 7bit
830X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
831 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
832
833''')
834
835 def test_no_semis_header_splitter(self):
836 eq = self.ndiffAssertEqual
837 msg = Message()
838 msg['From'] = 'test@dom.ain'
839 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
840 msg.set_payload('Test')
841 sfp = StringIO()
842 g = Generator(sfp)
843 g.flatten(msg)
844 eq(sfp.getvalue(), """\
845From: test@dom.ain
846References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
847 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
848
849Test""")
850
R David Murray7da4db12011-04-07 20:37:17 -0400851 def test_last_split_chunk_does_not_fit(self):
852 eq = self.ndiffAssertEqual
853 h = Header('Subject: the first part of this is short, but_the_second'
854 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
855 '_all_by_itself')
856 eq(h.encode(), """\
857Subject: the first part of this is short,
858 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
859
860 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
861 eq = self.ndiffAssertEqual
862 h = Header(', but_the_second'
863 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
864 '_all_by_itself')
865 eq(h.encode(), """\
866,
867 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
868
869 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
870 eq = self.ndiffAssertEqual
871 h = Header(', , but_the_second'
872 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
873 '_all_by_itself')
874 eq(h.encode(), """\
875, ,
876 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
877
878 def test_trailing_splitable_on_overlong_unsplitable(self):
879 eq = self.ndiffAssertEqual
880 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
881 'be_on_a_line_all_by_itself;')
882 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
883 "be_on_a_line_all_by_itself;")
884
885 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
886 eq = self.ndiffAssertEqual
887 h = Header('; '
888 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400889 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400890 eq(h.encode(), """\
891;
R David Murray01581ee2011-04-18 10:04:34 -0400892 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400893
R David Murraye1292a22011-04-07 20:54:03 -0400894 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400895 eq = self.ndiffAssertEqual
896 h = Header('This is a long line that has two whitespaces in a row. '
897 'This used to cause truncation of the header when folded')
898 eq(h.encode(), """\
899This is a long line that has two whitespaces in a row. This used to cause
900 truncation of the header when folded""")
901
R David Murray01581ee2011-04-18 10:04:34 -0400902 def test_splitter_split_on_punctuation_only_if_fws(self):
903 eq = self.ndiffAssertEqual
904 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
905 'they;arenotlegal;fold,points')
906 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
907 "arenotlegal;fold,points")
908
909 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
910 eq = self.ndiffAssertEqual
911 h = Header('this is a test where we need to have more than one line '
912 'before; our final line that is just too big to fit;; '
913 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
914 'be_on_a_line_all_by_itself;')
915 eq(h.encode(), """\
916this is a test where we need to have more than one line before;
917 our final line that is just too big to fit;;
918 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
919
920 def test_overlong_last_part_followed_by_split_point(self):
921 eq = self.ndiffAssertEqual
922 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
923 'be_on_a_line_all_by_itself ')
924 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
925 "should_be_on_a_line_all_by_itself ")
926
927 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
928 eq = self.ndiffAssertEqual
929 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
930 'before_our_final_line_; ; '
931 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
932 'be_on_a_line_all_by_itself; ')
933 eq(h.encode(), """\
934this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
935 ;
936 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
937
938 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
939 eq = self.ndiffAssertEqual
940 h = Header('this is a test where we need to have more than one line '
941 'before our final line; ; '
942 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
943 'be_on_a_line_all_by_itself; ')
944 eq(h.encode(), """\
945this is a test where we need to have more than one line before our final line;
946 ;
947 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
948
949 def test_long_header_with_whitespace_runs(self):
950 eq = self.ndiffAssertEqual
951 msg = Message()
952 msg['From'] = 'test@dom.ain'
953 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
954 msg.set_payload('Test')
955 sfp = StringIO()
956 g = Generator(sfp)
957 g.flatten(msg)
958 eq(sfp.getvalue(), """\
959From: test@dom.ain
960References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
961 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
962 <foo@dom.ain> <foo@dom.ain>\x20\x20
963
964Test""")
965
966 def test_long_run_with_semi_header_splitter(self):
967 eq = self.ndiffAssertEqual
968 msg = Message()
969 msg['From'] = 'test@dom.ain'
970 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
971 msg.set_payload('Test')
972 sfp = StringIO()
973 g = Generator(sfp)
974 g.flatten(msg)
975 eq(sfp.getvalue(), """\
976From: test@dom.ain
977References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
978 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
979 <foo@dom.ain>; abc
980
981Test""")
982
983 def test_splitter_split_on_punctuation_only_if_fws(self):
984 eq = self.ndiffAssertEqual
985 msg = Message()
986 msg['From'] = 'test@dom.ain'
987 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
988 'they;arenotlegal;fold,points')
989 msg.set_payload('Test')
990 sfp = StringIO()
991 g = Generator(sfp)
992 g.flatten(msg)
993 # XXX the space after the header should not be there.
994 eq(sfp.getvalue(), """\
995From: test@dom.ain
996References:\x20
997 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
998
999Test""")
1000
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001001 def test_no_split_long_header(self):
1002 eq = self.ndiffAssertEqual
1003 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +00001004 h = Header(hstr)
1005 # These come on two lines because Headers are really field value
1006 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001007 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001008References:
1009 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1010 h = Header('x' * 80)
1011 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001012
1013 def test_splitting_multiple_long_lines(self):
1014 eq = self.ndiffAssertEqual
1015 hstr = """\
1016from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1017\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1018\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1019"""
1020 h = Header(hstr, continuation_ws='\t')
1021 eq(h.encode(), """\
1022from babylon.socal-raves.org (localhost [127.0.0.1]);
1023 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1024 for <mailman-admin@babylon.socal-raves.org>;
1025 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1026\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1027 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1028 for <mailman-admin@babylon.socal-raves.org>;
1029 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1030\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1031 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1032 for <mailman-admin@babylon.socal-raves.org>;
1033 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1034
1035 def test_splitting_first_line_only_is_long(self):
1036 eq = self.ndiffAssertEqual
1037 hstr = """\
1038from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1039\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1040\tid 17k4h5-00034i-00
1041\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1042 h = Header(hstr, maxlinelen=78, header_name='Received',
1043 continuation_ws='\t')
1044 eq(h.encode(), """\
1045from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1046 helo=cthulhu.gerg.ca)
1047\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1048\tid 17k4h5-00034i-00
1049\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1050
1051 def test_long_8bit_header(self):
1052 eq = self.ndiffAssertEqual
1053 msg = Message()
1054 h = Header('Britische Regierung gibt', 'iso-8859-1',
1055 header_name='Subject')
1056 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001057 eq(h.encode(maxlinelen=76), """\
1058=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1059 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001060 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001061 eq(msg.as_string(maxheaderlen=76), """\
1062Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1063 =?iso-8859-1?q?hore-Windkraftprojekte?=
1064
1065""")
1066 eq(msg.as_string(maxheaderlen=0), """\
1067Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001068
1069""")
1070
1071 def test_long_8bit_header_no_charset(self):
1072 eq = self.ndiffAssertEqual
1073 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001074 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1075 'f\xfcr Offshore-Windkraftprojekte '
1076 '<a-very-long-address@example.com>')
1077 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001078 eq(msg.as_string(maxheaderlen=78), """\
1079Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1080 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1081
1082""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001083 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001084 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001085 header_name='Reply-To')
1086 eq(msg.as_string(maxheaderlen=78), """\
1087Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1088 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001089
1090""")
1091
1092 def test_long_to_header(self):
1093 eq = self.ndiffAssertEqual
1094 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001095 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001096 '"Someone Test #B" <someone@umich.edu>, '
1097 '"Someone Test #C" <someone@eecs.umich.edu>, '
1098 '"Someone Test #D" <someone@eecs.umich.edu>')
1099 msg = Message()
1100 msg['To'] = to
1101 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001102To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001103 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001104 "Someone Test #C" <someone@eecs.umich.edu>,
1105 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001106
1107''')
1108
1109 def test_long_line_after_append(self):
1110 eq = self.ndiffAssertEqual
1111 s = 'This is an example of string which has almost the limit of header length.'
1112 h = Header(s)
1113 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001114 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001115This is an example of string which has almost the limit of header length.
1116 Add another line.""")
1117
1118 def test_shorter_line_with_append(self):
1119 eq = self.ndiffAssertEqual
1120 s = 'This is a shorter line.'
1121 h = Header(s)
1122 h.append('Add another sentence. (Surprise?)')
1123 eq(h.encode(),
1124 'This is a shorter line. Add another sentence. (Surprise?)')
1125
1126 def test_long_field_name(self):
1127 eq = self.ndiffAssertEqual
1128 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001129 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1130 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1131 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1132 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001133 h = Header(gs, 'iso-8859-1', header_name=fn)
1134 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001135 eq(h.encode(maxlinelen=76), """\
1136=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1137 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1138 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1139 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001140
1141 def test_long_received_header(self):
1142 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1143 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1144 'Wed, 05 Mar 2003 18:10:18 -0700')
1145 msg = Message()
1146 msg['Received-1'] = Header(h, continuation_ws='\t')
1147 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001148 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001149 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001150Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1151 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001152 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001153Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1154 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001155 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001156
1157""")
1158
1159 def test_string_headerinst_eq(self):
1160 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1161 'tu-muenchen.de> (David Bremner\'s message of '
1162 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1163 msg = Message()
1164 msg['Received-1'] = Header(h, header_name='Received-1',
1165 continuation_ws='\t')
1166 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001167 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001168 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001169Received-1:\x20
1170 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1171 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1172Received-2:\x20
1173 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1174 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001175
1176""")
1177
1178 def test_long_unbreakable_lines_with_continuation(self):
1179 eq = self.ndiffAssertEqual
1180 msg = Message()
1181 t = """\
1182iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1183 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1184 msg['Face-1'] = t
1185 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001186 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001187 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001188 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001189 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001190Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001191 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001192 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001193Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001194 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001195 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001196Face-3:\x20
1197 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1198 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001199
1200""")
1201
1202 def test_another_long_multiline_header(self):
1203 eq = self.ndiffAssertEqual
1204 m = ('Received: from siimage.com '
1205 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001206 'Microsoft SMTPSVC(5.0.2195.4905); '
1207 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001208 msg = email.message_from_string(m)
1209 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001210Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1211 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001212
1213''')
1214
1215 def test_long_lines_with_different_header(self):
1216 eq = self.ndiffAssertEqual
1217 h = ('List-Unsubscribe: '
1218 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1219 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1220 '?subject=unsubscribe>')
1221 msg = Message()
1222 msg['List'] = h
1223 msg['List'] = Header(h, header_name='List')
1224 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001225List: List-Unsubscribe:
1226 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001227 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001228List: List-Unsubscribe:
1229 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001230 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001231
1232""")
1233
R. David Murray6f0022d2011-01-07 21:57:25 +00001234 def test_long_rfc2047_header_with_embedded_fws(self):
1235 h = Header(textwrap.dedent("""\
1236 We're going to pretend this header is in a non-ascii character set
1237 \tto see if line wrapping with encoded words and embedded
1238 folding white space works"""),
1239 charset='utf-8',
1240 header_name='Test')
1241 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1242 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1243 =?utf-8?q?cter_set?=
1244 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1245 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1246
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001247
Ezio Melottib3aedd42010-11-20 19:04:17 +00001248
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001249# Test mangling of "From " lines in the body of a message
1250class TestFromMangling(unittest.TestCase):
1251 def setUp(self):
1252 self.msg = Message()
1253 self.msg['From'] = 'aaa@bbb.org'
1254 self.msg.set_payload("""\
1255From the desk of A.A.A.:
1256Blah blah blah
1257""")
1258
1259 def test_mangled_from(self):
1260 s = StringIO()
1261 g = Generator(s, mangle_from_=True)
1262 g.flatten(self.msg)
1263 self.assertEqual(s.getvalue(), """\
1264From: aaa@bbb.org
1265
1266>From the desk of A.A.A.:
1267Blah blah blah
1268""")
1269
1270 def test_dont_mangle_from(self):
1271 s = StringIO()
1272 g = Generator(s, mangle_from_=False)
1273 g.flatten(self.msg)
1274 self.assertEqual(s.getvalue(), """\
1275From: aaa@bbb.org
1276
1277From the desk of A.A.A.:
1278Blah blah blah
1279""")
1280
1281
Ezio Melottib3aedd42010-11-20 19:04:17 +00001282
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001283# Test the basic MIMEAudio class
1284class TestMIMEAudio(unittest.TestCase):
1285 def setUp(self):
R David Murray28346b82011-03-31 11:40:20 -04001286 with openfile('audiotest.au', 'rb') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001287 self._audiodata = fp.read()
1288 self._au = MIMEAudio(self._audiodata)
1289
1290 def test_guess_minor_type(self):
1291 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1292
1293 def test_encoding(self):
1294 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001295 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1296 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001297
1298 def test_checkSetMinor(self):
1299 au = MIMEAudio(self._audiodata, 'fish')
1300 self.assertEqual(au.get_content_type(), 'audio/fish')
1301
1302 def test_add_header(self):
1303 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001304 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001305 self._au.add_header('Content-Disposition', 'attachment',
1306 filename='audiotest.au')
1307 eq(self._au['content-disposition'],
1308 'attachment; filename="audiotest.au"')
1309 eq(self._au.get_params(header='content-disposition'),
1310 [('attachment', ''), ('filename', 'audiotest.au')])
1311 eq(self._au.get_param('filename', header='content-disposition'),
1312 'audiotest.au')
1313 missing = []
1314 eq(self._au.get_param('attachment', header='content-disposition'), '')
1315 unless(self._au.get_param('foo', failobj=missing,
1316 header='content-disposition') is missing)
1317 # Try some missing stuff
1318 unless(self._au.get_param('foobar', missing) is missing)
1319 unless(self._au.get_param('attachment', missing,
1320 header='foobar') is missing)
1321
1322
Ezio Melottib3aedd42010-11-20 19:04:17 +00001323
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001324# Test the basic MIMEImage class
1325class TestMIMEImage(unittest.TestCase):
1326 def setUp(self):
1327 with openfile('PyBanner048.gif', 'rb') as fp:
1328 self._imgdata = fp.read()
1329 self._im = MIMEImage(self._imgdata)
1330
1331 def test_guess_minor_type(self):
1332 self.assertEqual(self._im.get_content_type(), 'image/gif')
1333
1334 def test_encoding(self):
1335 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001336 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1337 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001338
1339 def test_checkSetMinor(self):
1340 im = MIMEImage(self._imgdata, 'fish')
1341 self.assertEqual(im.get_content_type(), 'image/fish')
1342
1343 def test_add_header(self):
1344 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001345 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001346 self._im.add_header('Content-Disposition', 'attachment',
1347 filename='dingusfish.gif')
1348 eq(self._im['content-disposition'],
1349 'attachment; filename="dingusfish.gif"')
1350 eq(self._im.get_params(header='content-disposition'),
1351 [('attachment', ''), ('filename', 'dingusfish.gif')])
1352 eq(self._im.get_param('filename', header='content-disposition'),
1353 'dingusfish.gif')
1354 missing = []
1355 eq(self._im.get_param('attachment', header='content-disposition'), '')
1356 unless(self._im.get_param('foo', failobj=missing,
1357 header='content-disposition') is missing)
1358 # Try some missing stuff
1359 unless(self._im.get_param('foobar', missing) is missing)
1360 unless(self._im.get_param('attachment', missing,
1361 header='foobar') is missing)
1362
1363
Ezio Melottib3aedd42010-11-20 19:04:17 +00001364
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001365# Test the basic MIMEApplication class
1366class TestMIMEApplication(unittest.TestCase):
1367 def test_headers(self):
1368 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001369 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001370 eq(msg.get_content_type(), 'application/octet-stream')
1371 eq(msg['content-transfer-encoding'], 'base64')
1372
1373 def test_body(self):
1374 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001375 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1376 msg = MIMEApplication(bytesdata)
1377 # whitespace in the cte encoded block is RFC-irrelevant.
1378 eq(msg.get_payload().strip(), '+vv8/f7/')
1379 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001380
1381
Ezio Melottib3aedd42010-11-20 19:04:17 +00001382
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001383# Test the basic MIMEText class
1384class TestMIMEText(unittest.TestCase):
1385 def setUp(self):
1386 self._msg = MIMEText('hello there')
1387
1388 def test_types(self):
1389 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001390 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001391 eq(self._msg.get_content_type(), 'text/plain')
1392 eq(self._msg.get_param('charset'), 'us-ascii')
1393 missing = []
1394 unless(self._msg.get_param('foobar', missing) is missing)
1395 unless(self._msg.get_param('charset', missing, header='foobar')
1396 is missing)
1397
1398 def test_payload(self):
1399 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001400 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001401
1402 def test_charset(self):
1403 eq = self.assertEqual
1404 msg = MIMEText('hello there', _charset='us-ascii')
1405 eq(msg.get_charset().input_charset, 'us-ascii')
1406 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1407
R. David Murray850fc852010-06-03 01:58:28 +00001408 def test_7bit_input(self):
1409 eq = self.assertEqual
1410 msg = MIMEText('hello there', _charset='us-ascii')
1411 eq(msg.get_charset().input_charset, 'us-ascii')
1412 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1413
1414 def test_7bit_input_no_charset(self):
1415 eq = self.assertEqual
1416 msg = MIMEText('hello there')
1417 eq(msg.get_charset(), 'us-ascii')
1418 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1419 self.assertTrue('hello there' in msg.as_string())
1420
1421 def test_utf8_input(self):
1422 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1423 eq = self.assertEqual
1424 msg = MIMEText(teststr, _charset='utf-8')
1425 eq(msg.get_charset().output_charset, 'utf-8')
1426 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1427 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1428
1429 @unittest.skip("can't fix because of backward compat in email5, "
1430 "will fix in email6")
1431 def test_utf8_input_no_charset(self):
1432 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1433 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1434
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001435
Ezio Melottib3aedd42010-11-20 19:04:17 +00001436
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001437# Test complicated multipart/* messages
1438class TestMultipart(TestEmailBase):
1439 def setUp(self):
1440 with openfile('PyBanner048.gif', 'rb') as fp:
1441 data = fp.read()
1442 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1443 image = MIMEImage(data, name='dingusfish.gif')
1444 image.add_header('content-disposition', 'attachment',
1445 filename='dingusfish.gif')
1446 intro = MIMEText('''\
1447Hi there,
1448
1449This is the dingus fish.
1450''')
1451 container.attach(intro)
1452 container.attach(image)
1453 container['From'] = 'Barry <barry@digicool.com>'
1454 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1455 container['Subject'] = 'Here is your dingus fish'
1456
1457 now = 987809702.54848599
1458 timetuple = time.localtime(now)
1459 if timetuple[-1] == 0:
1460 tzsecs = time.timezone
1461 else:
1462 tzsecs = time.altzone
1463 if tzsecs > 0:
1464 sign = '-'
1465 else:
1466 sign = '+'
1467 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1468 container['Date'] = time.strftime(
1469 '%a, %d %b %Y %H:%M:%S',
1470 time.localtime(now)) + tzoffset
1471 self._msg = container
1472 self._im = image
1473 self._txt = intro
1474
1475 def test_hierarchy(self):
1476 # convenience
1477 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001478 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001479 raises = self.assertRaises
1480 # tests
1481 m = self._msg
1482 unless(m.is_multipart())
1483 eq(m.get_content_type(), 'multipart/mixed')
1484 eq(len(m.get_payload()), 2)
1485 raises(IndexError, m.get_payload, 2)
1486 m0 = m.get_payload(0)
1487 m1 = m.get_payload(1)
1488 unless(m0 is self._txt)
1489 unless(m1 is self._im)
1490 eq(m.get_payload(), [m0, m1])
1491 unless(not m0.is_multipart())
1492 unless(not m1.is_multipart())
1493
1494 def test_empty_multipart_idempotent(self):
1495 text = """\
1496Content-Type: multipart/mixed; boundary="BOUNDARY"
1497MIME-Version: 1.0
1498Subject: A subject
1499To: aperson@dom.ain
1500From: bperson@dom.ain
1501
1502
1503--BOUNDARY
1504
1505
1506--BOUNDARY--
1507"""
1508 msg = Parser().parsestr(text)
1509 self.ndiffAssertEqual(text, msg.as_string())
1510
1511 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1512 outer = MIMEBase('multipart', 'mixed')
1513 outer['Subject'] = 'A subject'
1514 outer['To'] = 'aperson@dom.ain'
1515 outer['From'] = 'bperson@dom.ain'
1516 outer.set_boundary('BOUNDARY')
1517 self.ndiffAssertEqual(outer.as_string(), '''\
1518Content-Type: multipart/mixed; boundary="BOUNDARY"
1519MIME-Version: 1.0
1520Subject: A subject
1521To: aperson@dom.ain
1522From: bperson@dom.ain
1523
1524--BOUNDARY
1525
1526--BOUNDARY--''')
1527
1528 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1529 outer = MIMEBase('multipart', 'mixed')
1530 outer['Subject'] = 'A subject'
1531 outer['To'] = 'aperson@dom.ain'
1532 outer['From'] = 'bperson@dom.ain'
1533 outer.preamble = ''
1534 outer.epilogue = ''
1535 outer.set_boundary('BOUNDARY')
1536 self.ndiffAssertEqual(outer.as_string(), '''\
1537Content-Type: multipart/mixed; boundary="BOUNDARY"
1538MIME-Version: 1.0
1539Subject: A subject
1540To: aperson@dom.ain
1541From: bperson@dom.ain
1542
1543
1544--BOUNDARY
1545
1546--BOUNDARY--
1547''')
1548
1549 def test_one_part_in_a_multipart(self):
1550 eq = self.ndiffAssertEqual
1551 outer = MIMEBase('multipart', 'mixed')
1552 outer['Subject'] = 'A subject'
1553 outer['To'] = 'aperson@dom.ain'
1554 outer['From'] = 'bperson@dom.ain'
1555 outer.set_boundary('BOUNDARY')
1556 msg = MIMEText('hello world')
1557 outer.attach(msg)
1558 eq(outer.as_string(), '''\
1559Content-Type: multipart/mixed; boundary="BOUNDARY"
1560MIME-Version: 1.0
1561Subject: A subject
1562To: aperson@dom.ain
1563From: bperson@dom.ain
1564
1565--BOUNDARY
1566Content-Type: text/plain; charset="us-ascii"
1567MIME-Version: 1.0
1568Content-Transfer-Encoding: 7bit
1569
1570hello world
1571--BOUNDARY--''')
1572
1573 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1574 eq = self.ndiffAssertEqual
1575 outer = MIMEBase('multipart', 'mixed')
1576 outer['Subject'] = 'A subject'
1577 outer['To'] = 'aperson@dom.ain'
1578 outer['From'] = 'bperson@dom.ain'
1579 outer.preamble = ''
1580 msg = MIMEText('hello world')
1581 outer.attach(msg)
1582 outer.set_boundary('BOUNDARY')
1583 eq(outer.as_string(), '''\
1584Content-Type: multipart/mixed; boundary="BOUNDARY"
1585MIME-Version: 1.0
1586Subject: A subject
1587To: aperson@dom.ain
1588From: bperson@dom.ain
1589
1590
1591--BOUNDARY
1592Content-Type: text/plain; charset="us-ascii"
1593MIME-Version: 1.0
1594Content-Transfer-Encoding: 7bit
1595
1596hello world
1597--BOUNDARY--''')
1598
1599
1600 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1601 eq = self.ndiffAssertEqual
1602 outer = MIMEBase('multipart', 'mixed')
1603 outer['Subject'] = 'A subject'
1604 outer['To'] = 'aperson@dom.ain'
1605 outer['From'] = 'bperson@dom.ain'
1606 outer.preamble = None
1607 msg = MIMEText('hello world')
1608 outer.attach(msg)
1609 outer.set_boundary('BOUNDARY')
1610 eq(outer.as_string(), '''\
1611Content-Type: multipart/mixed; boundary="BOUNDARY"
1612MIME-Version: 1.0
1613Subject: A subject
1614To: aperson@dom.ain
1615From: bperson@dom.ain
1616
1617--BOUNDARY
1618Content-Type: text/plain; charset="us-ascii"
1619MIME-Version: 1.0
1620Content-Transfer-Encoding: 7bit
1621
1622hello world
1623--BOUNDARY--''')
1624
1625
1626 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1627 eq = self.ndiffAssertEqual
1628 outer = MIMEBase('multipart', 'mixed')
1629 outer['Subject'] = 'A subject'
1630 outer['To'] = 'aperson@dom.ain'
1631 outer['From'] = 'bperson@dom.ain'
1632 outer.epilogue = None
1633 msg = MIMEText('hello world')
1634 outer.attach(msg)
1635 outer.set_boundary('BOUNDARY')
1636 eq(outer.as_string(), '''\
1637Content-Type: multipart/mixed; boundary="BOUNDARY"
1638MIME-Version: 1.0
1639Subject: A subject
1640To: aperson@dom.ain
1641From: bperson@dom.ain
1642
1643--BOUNDARY
1644Content-Type: text/plain; charset="us-ascii"
1645MIME-Version: 1.0
1646Content-Transfer-Encoding: 7bit
1647
1648hello world
1649--BOUNDARY--''')
1650
1651
1652 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1653 eq = self.ndiffAssertEqual
1654 outer = MIMEBase('multipart', 'mixed')
1655 outer['Subject'] = 'A subject'
1656 outer['To'] = 'aperson@dom.ain'
1657 outer['From'] = 'bperson@dom.ain'
1658 outer.epilogue = ''
1659 msg = MIMEText('hello world')
1660 outer.attach(msg)
1661 outer.set_boundary('BOUNDARY')
1662 eq(outer.as_string(), '''\
1663Content-Type: multipart/mixed; boundary="BOUNDARY"
1664MIME-Version: 1.0
1665Subject: A subject
1666To: aperson@dom.ain
1667From: bperson@dom.ain
1668
1669--BOUNDARY
1670Content-Type: text/plain; charset="us-ascii"
1671MIME-Version: 1.0
1672Content-Transfer-Encoding: 7bit
1673
1674hello world
1675--BOUNDARY--
1676''')
1677
1678
1679 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1680 eq = self.ndiffAssertEqual
1681 outer = MIMEBase('multipart', 'mixed')
1682 outer['Subject'] = 'A subject'
1683 outer['To'] = 'aperson@dom.ain'
1684 outer['From'] = 'bperson@dom.ain'
1685 outer.epilogue = '\n'
1686 msg = MIMEText('hello world')
1687 outer.attach(msg)
1688 outer.set_boundary('BOUNDARY')
1689 eq(outer.as_string(), '''\
1690Content-Type: multipart/mixed; boundary="BOUNDARY"
1691MIME-Version: 1.0
1692Subject: A subject
1693To: aperson@dom.ain
1694From: bperson@dom.ain
1695
1696--BOUNDARY
1697Content-Type: text/plain; charset="us-ascii"
1698MIME-Version: 1.0
1699Content-Transfer-Encoding: 7bit
1700
1701hello world
1702--BOUNDARY--
1703
1704''')
1705
1706 def test_message_external_body(self):
1707 eq = self.assertEqual
1708 msg = self._msgobj('msg_36.txt')
1709 eq(len(msg.get_payload()), 2)
1710 msg1 = msg.get_payload(1)
1711 eq(msg1.get_content_type(), 'multipart/alternative')
1712 eq(len(msg1.get_payload()), 2)
1713 for subpart in msg1.get_payload():
1714 eq(subpart.get_content_type(), 'message/external-body')
1715 eq(len(subpart.get_payload()), 1)
1716 subsubpart = subpart.get_payload(0)
1717 eq(subsubpart.get_content_type(), 'text/plain')
1718
1719 def test_double_boundary(self):
1720 # msg_37.txt is a multipart that contains two dash-boundary's in a
1721 # row. Our interpretation of RFC 2046 calls for ignoring the second
1722 # and subsequent boundaries.
1723 msg = self._msgobj('msg_37.txt')
1724 self.assertEqual(len(msg.get_payload()), 3)
1725
1726 def test_nested_inner_contains_outer_boundary(self):
1727 eq = self.ndiffAssertEqual
1728 # msg_38.txt has an inner part that contains outer boundaries. My
1729 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1730 # these are illegal and should be interpreted as unterminated inner
1731 # parts.
1732 msg = self._msgobj('msg_38.txt')
1733 sfp = StringIO()
1734 iterators._structure(msg, sfp)
1735 eq(sfp.getvalue(), """\
1736multipart/mixed
1737 multipart/mixed
1738 multipart/alternative
1739 text/plain
1740 text/plain
1741 text/plain
1742 text/plain
1743""")
1744
1745 def test_nested_with_same_boundary(self):
1746 eq = self.ndiffAssertEqual
1747 # msg 39.txt is similarly evil in that it's got inner parts that use
1748 # the same boundary as outer parts. Again, I believe the way this is
1749 # parsed is closest to the spirit of RFC 2046
1750 msg = self._msgobj('msg_39.txt')
1751 sfp = StringIO()
1752 iterators._structure(msg, sfp)
1753 eq(sfp.getvalue(), """\
1754multipart/mixed
1755 multipart/mixed
1756 multipart/alternative
1757 application/octet-stream
1758 application/octet-stream
1759 text/plain
1760""")
1761
1762 def test_boundary_in_non_multipart(self):
1763 msg = self._msgobj('msg_40.txt')
1764 self.assertEqual(msg.as_string(), '''\
1765MIME-Version: 1.0
1766Content-Type: text/html; boundary="--961284236552522269"
1767
1768----961284236552522269
1769Content-Type: text/html;
1770Content-Transfer-Encoding: 7Bit
1771
1772<html></html>
1773
1774----961284236552522269--
1775''')
1776
1777 def test_boundary_with_leading_space(self):
1778 eq = self.assertEqual
1779 msg = email.message_from_string('''\
1780MIME-Version: 1.0
1781Content-Type: multipart/mixed; boundary=" XXXX"
1782
1783-- XXXX
1784Content-Type: text/plain
1785
1786
1787-- XXXX
1788Content-Type: text/plain
1789
1790-- XXXX--
1791''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001792 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001793 eq(msg.get_boundary(), ' XXXX')
1794 eq(len(msg.get_payload()), 2)
1795
1796 def test_boundary_without_trailing_newline(self):
1797 m = Parser().parsestr("""\
1798Content-Type: multipart/mixed; boundary="===============0012394164=="
1799MIME-Version: 1.0
1800
1801--===============0012394164==
1802Content-Type: image/file1.jpg
1803MIME-Version: 1.0
1804Content-Transfer-Encoding: base64
1805
1806YXNkZg==
1807--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001808 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001809
1810
Ezio Melottib3aedd42010-11-20 19:04:17 +00001811
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001812# Test some badly formatted messages
R David Murrayc27e5222012-05-25 15:01:48 -04001813class TestNonConformant(TestEmailBase):
R David Murray3edd22a2011-04-18 13:59:37 -04001814
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001815 def test_parse_missing_minor_type(self):
1816 eq = self.assertEqual
1817 msg = self._msgobj('msg_14.txt')
1818 eq(msg.get_content_type(), 'text/plain')
1819 eq(msg.get_content_maintype(), 'text')
1820 eq(msg.get_content_subtype(), 'plain')
1821
R David Murray80e0aee2012-05-27 21:23:34 -04001822 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001823 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001824 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001825 msg = self._msgobj('msg_15.txt')
1826 # XXX We can probably eventually do better
1827 inner = msg.get_payload(0)
1828 unless(hasattr(inner, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04001829 self.assertEqual(len(inner.defects), 1)
1830 unless(isinstance(inner.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001831 errors.StartBoundaryNotFoundDefect))
1832
R David Murray80e0aee2012-05-27 21:23:34 -04001833 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001834 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001835 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001836 msg = self._msgobj('msg_25.txt')
1837 unless(isinstance(msg.get_payload(), str))
R David Murrayc27e5222012-05-25 15:01:48 -04001838 self.assertEqual(len(msg.defects), 2)
1839 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04001840 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04001841 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001842 errors.MultipartInvariantViolationDefect))
1843
R David Murray749073a2011-06-22 13:47:53 -04001844 multipart_msg = textwrap.dedent("""\
1845 Date: Wed, 14 Nov 2007 12:56:23 GMT
1846 From: foo@bar.invalid
1847 To: foo@bar.invalid
1848 Subject: Content-Transfer-Encoding: base64 and multipart
1849 MIME-Version: 1.0
1850 Content-Type: multipart/mixed;
1851 boundary="===============3344438784458119861=="{}
1852
1853 --===============3344438784458119861==
1854 Content-Type: text/plain
1855
1856 Test message
1857
1858 --===============3344438784458119861==
1859 Content-Type: application/octet-stream
1860 Content-Transfer-Encoding: base64
1861
1862 YWJj
1863
1864 --===============3344438784458119861==--
1865 """)
1866
R David Murray80e0aee2012-05-27 21:23:34 -04001867 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04001868 def test_multipart_invalid_cte(self):
R David Murrayc27e5222012-05-25 15:01:48 -04001869 msg = self._str_msg(
1870 self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
1871 self.assertEqual(len(msg.defects), 1)
1872 self.assertIsInstance(msg.defects[0],
R David Murray749073a2011-06-22 13:47:53 -04001873 errors.InvalidMultipartContentTransferEncodingDefect)
1874
R David Murray80e0aee2012-05-27 21:23:34 -04001875 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04001876 def test_multipart_no_cte_no_defect(self):
R David Murrayc27e5222012-05-25 15:01:48 -04001877 msg = self._str_msg(self.multipart_msg.format(''))
1878 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04001879
R David Murray80e0aee2012-05-27 21:23:34 -04001880 # test_defect_handling
R David Murray749073a2011-06-22 13:47:53 -04001881 def test_multipart_valid_cte_no_defect(self):
1882 for cte in ('7bit', '8bit', 'BINary'):
R David Murrayc27e5222012-05-25 15:01:48 -04001883 msg = self._str_msg(
R David Murray749073a2011-06-22 13:47:53 -04001884 self.multipart_msg.format(
R David Murrayc27e5222012-05-25 15:01:48 -04001885 "\nContent-Transfer-Encoding: {}".format(cte)))
1886 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04001887
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001888 def test_invalid_content_type(self):
1889 eq = self.assertEqual
1890 neq = self.ndiffAssertEqual
1891 msg = Message()
1892 # RFC 2045, $5.2 says invalid yields text/plain
1893 msg['Content-Type'] = 'text'
1894 eq(msg.get_content_maintype(), 'text')
1895 eq(msg.get_content_subtype(), 'plain')
1896 eq(msg.get_content_type(), 'text/plain')
1897 # Clear the old value and try something /really/ invalid
1898 del msg['content-type']
1899 msg['Content-Type'] = 'foo'
1900 eq(msg.get_content_maintype(), 'text')
1901 eq(msg.get_content_subtype(), 'plain')
1902 eq(msg.get_content_type(), 'text/plain')
1903 # Still, make sure that the message is idempotently generated
1904 s = StringIO()
1905 g = Generator(s)
1906 g.flatten(msg)
1907 neq(s.getvalue(), 'Content-Type: foo\n\n')
1908
1909 def test_no_start_boundary(self):
1910 eq = self.ndiffAssertEqual
1911 msg = self._msgobj('msg_31.txt')
1912 eq(msg.get_payload(), """\
1913--BOUNDARY
1914Content-Type: text/plain
1915
1916message 1
1917
1918--BOUNDARY
1919Content-Type: text/plain
1920
1921message 2
1922
1923--BOUNDARY--
1924""")
1925
1926 def test_no_separating_blank_line(self):
1927 eq = self.ndiffAssertEqual
1928 msg = self._msgobj('msg_35.txt')
1929 eq(msg.as_string(), """\
1930From: aperson@dom.ain
1931To: bperson@dom.ain
1932Subject: here's something interesting
1933
1934counter to RFC 2822, there's no separating newline here
1935""")
1936
R David Murray80e0aee2012-05-27 21:23:34 -04001937 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001938 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001939 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001940 msg = self._msgobj('msg_41.txt')
1941 unless(hasattr(msg, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04001942 self.assertEqual(len(msg.defects), 2)
1943 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04001944 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04001945 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001946 errors.MultipartInvariantViolationDefect))
1947
R David Murray80e0aee2012-05-27 21:23:34 -04001948 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001949 def test_missing_start_boundary(self):
1950 outer = self._msgobj('msg_42.txt')
1951 # The message structure is:
1952 #
1953 # multipart/mixed
1954 # text/plain
1955 # message/rfc822
1956 # multipart/mixed [*]
1957 #
1958 # [*] This message is missing its start boundary
1959 bad = outer.get_payload(1).get_payload(0)
R David Murrayc27e5222012-05-25 15:01:48 -04001960 self.assertEqual(len(bad.defects), 1)
1961 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001962 errors.StartBoundaryNotFoundDefect))
1963
R David Murray80e0aee2012-05-27 21:23:34 -04001964 # test_defect_handling
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001965 def test_first_line_is_continuation_header(self):
1966 eq = self.assertEqual
R David Murrayadbdcdb2012-05-27 20:45:01 -04001967 m = ' Line 1\nSubject: test\n\nbody'
R David Murrayc27e5222012-05-25 15:01:48 -04001968 msg = email.message_from_string(m)
R David Murrayadbdcdb2012-05-27 20:45:01 -04001969 eq(msg.keys(), ['Subject'])
1970 eq(msg.get_payload(), 'body')
R David Murrayc27e5222012-05-25 15:01:48 -04001971 eq(len(msg.defects), 1)
R David Murrayadbdcdb2012-05-27 20:45:01 -04001972 self.assertDefectsEqual(msg.defects,
1973 [errors.FirstHeaderLineIsContinuationDefect])
R David Murrayc27e5222012-05-25 15:01:48 -04001974 eq(msg.defects[0].line, ' Line 1\n')
R David Murray3edd22a2011-04-18 13:59:37 -04001975
R David Murrayd41595b2012-05-28 20:14:10 -04001976 # test_defect_handling
R David Murrayadbdcdb2012-05-27 20:45:01 -04001977 def test_missing_header_body_separator(self):
1978 # Our heuristic if we see a line that doesn't look like a header (no
1979 # leading whitespace but no ':') is to assume that the blank line that
1980 # separates the header from the body is missing, and to stop parsing
1981 # headers and start parsing the body.
1982 msg = self._str_msg('Subject: test\nnot a header\nTo: abc\n\nb\n')
1983 self.assertEqual(msg.keys(), ['Subject'])
1984 self.assertEqual(msg.get_payload(), 'not a header\nTo: abc\n\nb\n')
1985 self.assertDefectsEqual(msg.defects,
1986 [errors.MissingHeaderBodySeparatorDefect])
1987
Ezio Melottib3aedd42010-11-20 19:04:17 +00001988
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001989# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001990class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001991 def test_rfc2047_multiline(self):
1992 eq = self.assertEqual
1993 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1994 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1995 dh = decode_header(s)
1996 eq(dh, [
R David Murray07ea53c2012-06-02 17:56:49 -04001997 (b'Re: ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001998 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
R David Murray07ea53c2012-06-02 17:56:49 -04001999 (b' baz foo bar ', None),
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002000 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
2001 header = make_header(dh)
2002 eq(str(header),
2003 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00002004 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002005Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
2006 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002007
R David Murray07ea53c2012-06-02 17:56:49 -04002008 def test_whitespace_keeper_unicode(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002009 eq = self.assertEqual
2010 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
2011 dh = decode_header(s)
2012 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
R David Murray07ea53c2012-06-02 17:56:49 -04002013 (b' Pirard <pirard@dom.ain>', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002014 header = str(make_header(dh))
2015 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2016
R David Murray07ea53c2012-06-02 17:56:49 -04002017 def test_whitespace_keeper_unicode_2(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002018 eq = self.assertEqual
2019 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2020 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002021 eq(dh, [(b'The ', None), (b'quick brown fox', 'iso-8859-1'),
2022 (b' jumped over the ', None), (b'lazy dog', 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002023 hu = str(make_header(dh))
2024 eq(hu, 'The quick brown fox jumped over the lazy dog')
2025
2026 def test_rfc2047_missing_whitespace(self):
2027 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2028 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002029 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2030 (b'rg', None), (b'\xe5', 'iso-8859-1'),
2031 (b'sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002032
2033 def test_rfc2047_with_whitespace(self):
2034 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2035 dh = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04002036 self.assertEqual(dh, [(b'Sm ', None), (b'\xf6', 'iso-8859-1'),
2037 (b' rg ', None), (b'\xe5', 'iso-8859-1'),
2038 (b' sbord', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002039
R. David Murrayc4e69cc2010-08-03 22:14:10 +00002040 def test_rfc2047_B_bad_padding(self):
2041 s = '=?iso-8859-1?B?%s?='
2042 data = [ # only test complete bytes
2043 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2044 ('dmk=', b'vi'), ('dmk', b'vi')
2045 ]
2046 for q, a in data:
2047 dh = decode_header(s % q)
2048 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002049
R. David Murray31e984c2010-10-01 15:40:20 +00002050 def test_rfc2047_Q_invalid_digits(self):
2051 # issue 10004.
2052 s = '=?iso-8659-1?Q?andr=e9=zz?='
2053 self.assertEqual(decode_header(s),
2054 [(b'andr\xe9=zz', 'iso-8659-1')])
2055
R David Murray07ea53c2012-06-02 17:56:49 -04002056 def test_rfc2047_rfc2047_1(self):
2057 # 1st testcase at end of rfc2047
2058 s = '(=?ISO-8859-1?Q?a?=)'
2059 self.assertEqual(decode_header(s),
2060 [(b'(', None), (b'a', 'iso-8859-1'), (b')', None)])
2061
2062 def test_rfc2047_rfc2047_2(self):
2063 # 2nd testcase at end of rfc2047
2064 s = '(=?ISO-8859-1?Q?a?= b)'
2065 self.assertEqual(decode_header(s),
2066 [(b'(', None), (b'a', 'iso-8859-1'), (b' b)', None)])
2067
2068 def test_rfc2047_rfc2047_3(self):
2069 # 3rd testcase at end of rfc2047
2070 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2071 self.assertEqual(decode_header(s),
2072 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2073
2074 def test_rfc2047_rfc2047_4(self):
2075 # 4th testcase at end of rfc2047
2076 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?=)'
2077 self.assertEqual(decode_header(s),
2078 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2079
2080 def test_rfc2047_rfc2047_5a(self):
2081 # 5th testcase at end of rfc2047 newline is \r\n
2082 s = '(=?ISO-8859-1?Q?a?=\r\n =?ISO-8859-1?Q?b?=)'
2083 self.assertEqual(decode_header(s),
2084 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2085
2086 def test_rfc2047_rfc2047_5b(self):
2087 # 5th testcase at end of rfc2047 newline is \n
2088 s = '(=?ISO-8859-1?Q?a?=\n =?ISO-8859-1?Q?b?=)'
2089 self.assertEqual(decode_header(s),
2090 [(b'(', None), (b'ab', 'iso-8859-1'), (b')', None)])
2091
2092 def test_rfc2047_rfc2047_6(self):
2093 # 6th testcase at end of rfc2047
2094 s = '(=?ISO-8859-1?Q?a_b?=)'
2095 self.assertEqual(decode_header(s),
2096 [(b'(', None), (b'a b', 'iso-8859-1'), (b')', None)])
2097
2098 def test_rfc2047_rfc2047_7(self):
2099 # 7th testcase at end of rfc2047
2100 s = '(=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?=)'
2101 self.assertEqual(decode_header(s),
2102 [(b'(', None), (b'a', 'iso-8859-1'), (b' b', 'iso-8859-2'),
2103 (b')', None)])
2104 self.assertEqual(make_header(decode_header(s)).encode(), s.lower())
2105 self.assertEqual(str(make_header(decode_header(s))), '(a b)')
2106
Ezio Melottib3aedd42010-11-20 19:04:17 +00002107
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002108# Test the MIMEMessage class
2109class TestMIMEMessage(TestEmailBase):
2110 def setUp(self):
2111 with openfile('msg_11.txt') as fp:
2112 self._text = fp.read()
2113
2114 def test_type_error(self):
2115 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2116
2117 def test_valid_argument(self):
2118 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002119 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002120 subject = 'A sub-message'
2121 m = Message()
2122 m['Subject'] = subject
2123 r = MIMEMessage(m)
2124 eq(r.get_content_type(), 'message/rfc822')
2125 payload = r.get_payload()
2126 unless(isinstance(payload, list))
2127 eq(len(payload), 1)
2128 subpart = payload[0]
2129 unless(subpart is m)
2130 eq(subpart['subject'], subject)
2131
2132 def test_bad_multipart(self):
2133 eq = self.assertEqual
2134 msg1 = Message()
2135 msg1['Subject'] = 'subpart 1'
2136 msg2 = Message()
2137 msg2['Subject'] = 'subpart 2'
2138 r = MIMEMessage(msg1)
2139 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2140
2141 def test_generate(self):
2142 # First craft the message to be encapsulated
2143 m = Message()
2144 m['Subject'] = 'An enclosed message'
2145 m.set_payload('Here is the body of the message.\n')
2146 r = MIMEMessage(m)
2147 r['Subject'] = 'The enclosing message'
2148 s = StringIO()
2149 g = Generator(s)
2150 g.flatten(r)
2151 self.assertEqual(s.getvalue(), """\
2152Content-Type: message/rfc822
2153MIME-Version: 1.0
2154Subject: The enclosing message
2155
2156Subject: An enclosed message
2157
2158Here is the body of the message.
2159""")
2160
2161 def test_parse_message_rfc822(self):
2162 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002163 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002164 msg = self._msgobj('msg_11.txt')
2165 eq(msg.get_content_type(), 'message/rfc822')
2166 payload = msg.get_payload()
2167 unless(isinstance(payload, list))
2168 eq(len(payload), 1)
2169 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002170 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002171 eq(submsg['subject'], 'An enclosed message')
2172 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2173
2174 def test_dsn(self):
2175 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002176 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002177 # msg 16 is a Delivery Status Notification, see RFC 1894
2178 msg = self._msgobj('msg_16.txt')
2179 eq(msg.get_content_type(), 'multipart/report')
2180 unless(msg.is_multipart())
2181 eq(len(msg.get_payload()), 3)
2182 # Subpart 1 is a text/plain, human readable section
2183 subpart = msg.get_payload(0)
2184 eq(subpart.get_content_type(), 'text/plain')
2185 eq(subpart.get_payload(), """\
2186This report relates to a message you sent with the following header fields:
2187
2188 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2189 Date: Sun, 23 Sep 2001 20:10:55 -0700
2190 From: "Ian T. Henry" <henryi@oxy.edu>
2191 To: SoCal Raves <scr@socal-raves.org>
2192 Subject: [scr] yeah for Ians!!
2193
2194Your message cannot be delivered to the following recipients:
2195
2196 Recipient address: jangel1@cougar.noc.ucla.edu
2197 Reason: recipient reached disk quota
2198
2199""")
2200 # Subpart 2 contains the machine parsable DSN information. It
2201 # consists of two blocks of headers, represented by two nested Message
2202 # objects.
2203 subpart = msg.get_payload(1)
2204 eq(subpart.get_content_type(), 'message/delivery-status')
2205 eq(len(subpart.get_payload()), 2)
2206 # message/delivery-status should treat each block as a bunch of
2207 # headers, i.e. a bunch of Message objects.
2208 dsn1 = subpart.get_payload(0)
2209 unless(isinstance(dsn1, Message))
2210 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2211 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2212 # Try a missing one <wink>
2213 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2214 dsn2 = subpart.get_payload(1)
2215 unless(isinstance(dsn2, Message))
2216 eq(dsn2['action'], 'failed')
2217 eq(dsn2.get_params(header='original-recipient'),
2218 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2219 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2220 # Subpart 3 is the original message
2221 subpart = msg.get_payload(2)
2222 eq(subpart.get_content_type(), 'message/rfc822')
2223 payload = subpart.get_payload()
2224 unless(isinstance(payload, list))
2225 eq(len(payload), 1)
2226 subsubpart = payload[0]
2227 unless(isinstance(subsubpart, Message))
2228 eq(subsubpart.get_content_type(), 'text/plain')
2229 eq(subsubpart['message-id'],
2230 '<002001c144a6$8752e060$56104586@oxy.edu>')
2231
2232 def test_epilogue(self):
2233 eq = self.ndiffAssertEqual
2234 with openfile('msg_21.txt') as fp:
2235 text = fp.read()
2236 msg = Message()
2237 msg['From'] = 'aperson@dom.ain'
2238 msg['To'] = 'bperson@dom.ain'
2239 msg['Subject'] = 'Test'
2240 msg.preamble = 'MIME message'
2241 msg.epilogue = 'End of MIME message\n'
2242 msg1 = MIMEText('One')
2243 msg2 = MIMEText('Two')
2244 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2245 msg.attach(msg1)
2246 msg.attach(msg2)
2247 sfp = StringIO()
2248 g = Generator(sfp)
2249 g.flatten(msg)
2250 eq(sfp.getvalue(), text)
2251
2252 def test_no_nl_preamble(self):
2253 eq = self.ndiffAssertEqual
2254 msg = Message()
2255 msg['From'] = 'aperson@dom.ain'
2256 msg['To'] = 'bperson@dom.ain'
2257 msg['Subject'] = 'Test'
2258 msg.preamble = 'MIME message'
2259 msg.epilogue = ''
2260 msg1 = MIMEText('One')
2261 msg2 = MIMEText('Two')
2262 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2263 msg.attach(msg1)
2264 msg.attach(msg2)
2265 eq(msg.as_string(), """\
2266From: aperson@dom.ain
2267To: bperson@dom.ain
2268Subject: Test
2269Content-Type: multipart/mixed; boundary="BOUNDARY"
2270
2271MIME message
2272--BOUNDARY
2273Content-Type: text/plain; charset="us-ascii"
2274MIME-Version: 1.0
2275Content-Transfer-Encoding: 7bit
2276
2277One
2278--BOUNDARY
2279Content-Type: text/plain; charset="us-ascii"
2280MIME-Version: 1.0
2281Content-Transfer-Encoding: 7bit
2282
2283Two
2284--BOUNDARY--
2285""")
2286
2287 def test_default_type(self):
2288 eq = self.assertEqual
2289 with openfile('msg_30.txt') as fp:
2290 msg = email.message_from_file(fp)
2291 container1 = msg.get_payload(0)
2292 eq(container1.get_default_type(), 'message/rfc822')
2293 eq(container1.get_content_type(), 'message/rfc822')
2294 container2 = msg.get_payload(1)
2295 eq(container2.get_default_type(), 'message/rfc822')
2296 eq(container2.get_content_type(), 'message/rfc822')
2297 container1a = container1.get_payload(0)
2298 eq(container1a.get_default_type(), 'text/plain')
2299 eq(container1a.get_content_type(), 'text/plain')
2300 container2a = container2.get_payload(0)
2301 eq(container2a.get_default_type(), 'text/plain')
2302 eq(container2a.get_content_type(), 'text/plain')
2303
2304 def test_default_type_with_explicit_container_type(self):
2305 eq = self.assertEqual
2306 with openfile('msg_28.txt') as fp:
2307 msg = email.message_from_file(fp)
2308 container1 = msg.get_payload(0)
2309 eq(container1.get_default_type(), 'message/rfc822')
2310 eq(container1.get_content_type(), 'message/rfc822')
2311 container2 = msg.get_payload(1)
2312 eq(container2.get_default_type(), 'message/rfc822')
2313 eq(container2.get_content_type(), 'message/rfc822')
2314 container1a = container1.get_payload(0)
2315 eq(container1a.get_default_type(), 'text/plain')
2316 eq(container1a.get_content_type(), 'text/plain')
2317 container2a = container2.get_payload(0)
2318 eq(container2a.get_default_type(), 'text/plain')
2319 eq(container2a.get_content_type(), 'text/plain')
2320
2321 def test_default_type_non_parsed(self):
2322 eq = self.assertEqual
2323 neq = self.ndiffAssertEqual
2324 # Set up container
2325 container = MIMEMultipart('digest', 'BOUNDARY')
2326 container.epilogue = ''
2327 # Set up subparts
2328 subpart1a = MIMEText('message 1\n')
2329 subpart2a = MIMEText('message 2\n')
2330 subpart1 = MIMEMessage(subpart1a)
2331 subpart2 = MIMEMessage(subpart2a)
2332 container.attach(subpart1)
2333 container.attach(subpart2)
2334 eq(subpart1.get_content_type(), 'message/rfc822')
2335 eq(subpart1.get_default_type(), 'message/rfc822')
2336 eq(subpart2.get_content_type(), 'message/rfc822')
2337 eq(subpart2.get_default_type(), 'message/rfc822')
2338 neq(container.as_string(0), '''\
2339Content-Type: multipart/digest; boundary="BOUNDARY"
2340MIME-Version: 1.0
2341
2342--BOUNDARY
2343Content-Type: message/rfc822
2344MIME-Version: 1.0
2345
2346Content-Type: text/plain; charset="us-ascii"
2347MIME-Version: 1.0
2348Content-Transfer-Encoding: 7bit
2349
2350message 1
2351
2352--BOUNDARY
2353Content-Type: message/rfc822
2354MIME-Version: 1.0
2355
2356Content-Type: text/plain; charset="us-ascii"
2357MIME-Version: 1.0
2358Content-Transfer-Encoding: 7bit
2359
2360message 2
2361
2362--BOUNDARY--
2363''')
2364 del subpart1['content-type']
2365 del subpart1['mime-version']
2366 del subpart2['content-type']
2367 del subpart2['mime-version']
2368 eq(subpart1.get_content_type(), 'message/rfc822')
2369 eq(subpart1.get_default_type(), 'message/rfc822')
2370 eq(subpart2.get_content_type(), 'message/rfc822')
2371 eq(subpart2.get_default_type(), 'message/rfc822')
2372 neq(container.as_string(0), '''\
2373Content-Type: multipart/digest; boundary="BOUNDARY"
2374MIME-Version: 1.0
2375
2376--BOUNDARY
2377
2378Content-Type: text/plain; charset="us-ascii"
2379MIME-Version: 1.0
2380Content-Transfer-Encoding: 7bit
2381
2382message 1
2383
2384--BOUNDARY
2385
2386Content-Type: text/plain; charset="us-ascii"
2387MIME-Version: 1.0
2388Content-Transfer-Encoding: 7bit
2389
2390message 2
2391
2392--BOUNDARY--
2393''')
2394
2395 def test_mime_attachments_in_constructor(self):
2396 eq = self.assertEqual
2397 text1 = MIMEText('')
2398 text2 = MIMEText('')
2399 msg = MIMEMultipart(_subparts=(text1, text2))
2400 eq(len(msg.get_payload()), 2)
2401 eq(msg.get_payload(0), text1)
2402 eq(msg.get_payload(1), text2)
2403
Christian Heimes587c2bf2008-01-19 16:21:02 +00002404 def test_default_multipart_constructor(self):
2405 msg = MIMEMultipart()
2406 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002407
Ezio Melottib3aedd42010-11-20 19:04:17 +00002408
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002409# A general test of parser->model->generator idempotency. IOW, read a message
2410# in, parse it into a message object tree, then without touching the tree,
2411# regenerate the plain text. The original text and the transformed text
2412# should be identical. Note: that we ignore the Unix-From since that may
2413# contain a changed date.
2414class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002415
2416 linesep = '\n'
2417
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002418 def _msgobj(self, filename):
2419 with openfile(filename) as fp:
2420 data = fp.read()
2421 msg = email.message_from_string(data)
2422 return msg, data
2423
R. David Murray719a4492010-11-21 16:53:48 +00002424 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002425 eq = self.ndiffAssertEqual
2426 s = StringIO()
2427 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002428 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002429 eq(text, s.getvalue())
2430
2431 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002432 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002433 msg, text = self._msgobj('msg_01.txt')
2434 eq(msg.get_content_type(), 'text/plain')
2435 eq(msg.get_content_maintype(), 'text')
2436 eq(msg.get_content_subtype(), 'plain')
2437 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2438 eq(msg.get_param('charset'), 'us-ascii')
2439 eq(msg.preamble, None)
2440 eq(msg.epilogue, None)
2441 self._idempotent(msg, text)
2442
2443 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002444 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002445 msg, text = self._msgobj('msg_03.txt')
2446 eq(msg.get_content_type(), 'text/plain')
2447 eq(msg.get_params(), None)
2448 eq(msg.get_param('charset'), None)
2449 self._idempotent(msg, text)
2450
2451 def test_simple_multipart(self):
2452 msg, text = self._msgobj('msg_04.txt')
2453 self._idempotent(msg, text)
2454
2455 def test_MIME_digest(self):
2456 msg, text = self._msgobj('msg_02.txt')
2457 self._idempotent(msg, text)
2458
2459 def test_long_header(self):
2460 msg, text = self._msgobj('msg_27.txt')
2461 self._idempotent(msg, text)
2462
2463 def test_MIME_digest_with_part_headers(self):
2464 msg, text = self._msgobj('msg_28.txt')
2465 self._idempotent(msg, text)
2466
2467 def test_mixed_with_image(self):
2468 msg, text = self._msgobj('msg_06.txt')
2469 self._idempotent(msg, text)
2470
2471 def test_multipart_report(self):
2472 msg, text = self._msgobj('msg_05.txt')
2473 self._idempotent(msg, text)
2474
2475 def test_dsn(self):
2476 msg, text = self._msgobj('msg_16.txt')
2477 self._idempotent(msg, text)
2478
2479 def test_preamble_epilogue(self):
2480 msg, text = self._msgobj('msg_21.txt')
2481 self._idempotent(msg, text)
2482
2483 def test_multipart_one_part(self):
2484 msg, text = self._msgobj('msg_23.txt')
2485 self._idempotent(msg, text)
2486
2487 def test_multipart_no_parts(self):
2488 msg, text = self._msgobj('msg_24.txt')
2489 self._idempotent(msg, text)
2490
2491 def test_no_start_boundary(self):
2492 msg, text = self._msgobj('msg_31.txt')
2493 self._idempotent(msg, text)
2494
2495 def test_rfc2231_charset(self):
2496 msg, text = self._msgobj('msg_32.txt')
2497 self._idempotent(msg, text)
2498
2499 def test_more_rfc2231_parameters(self):
2500 msg, text = self._msgobj('msg_33.txt')
2501 self._idempotent(msg, text)
2502
2503 def test_text_plain_in_a_multipart_digest(self):
2504 msg, text = self._msgobj('msg_34.txt')
2505 self._idempotent(msg, text)
2506
2507 def test_nested_multipart_mixeds(self):
2508 msg, text = self._msgobj('msg_12a.txt')
2509 self._idempotent(msg, text)
2510
2511 def test_message_external_body_idempotent(self):
2512 msg, text = self._msgobj('msg_36.txt')
2513 self._idempotent(msg, text)
2514
R. David Murray719a4492010-11-21 16:53:48 +00002515 def test_message_delivery_status(self):
2516 msg, text = self._msgobj('msg_43.txt')
2517 self._idempotent(msg, text, unixfrom=True)
2518
R. David Murray96fd54e2010-10-08 15:55:28 +00002519 def test_message_signed_idempotent(self):
2520 msg, text = self._msgobj('msg_45.txt')
2521 self._idempotent(msg, text)
2522
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002523 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002524 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002525 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002526 # Get a message object and reset the seek pointer for other tests
2527 msg, text = self._msgobj('msg_05.txt')
2528 eq(msg.get_content_type(), 'multipart/report')
2529 # Test the Content-Type: parameters
2530 params = {}
2531 for pk, pv in msg.get_params():
2532 params[pk] = pv
2533 eq(params['report-type'], 'delivery-status')
2534 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002535 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2536 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002537 eq(len(msg.get_payload()), 3)
2538 # Make sure the subparts are what we expect
2539 msg1 = msg.get_payload(0)
2540 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002541 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002542 msg2 = msg.get_payload(1)
2543 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002544 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002545 msg3 = msg.get_payload(2)
2546 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002547 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002548 payload = msg3.get_payload()
2549 unless(isinstance(payload, list))
2550 eq(len(payload), 1)
2551 msg4 = payload[0]
2552 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002553 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002554
2555 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002556 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002557 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002558 msg, text = self._msgobj('msg_06.txt')
2559 # Check some of the outer headers
2560 eq(msg.get_content_type(), 'message/rfc822')
2561 # Make sure the payload is a list of exactly one sub-Message, and that
2562 # that submessage has a type of text/plain
2563 payload = msg.get_payload()
2564 unless(isinstance(payload, list))
2565 eq(len(payload), 1)
2566 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002567 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002568 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002569 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002570 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002571
2572
Ezio Melottib3aedd42010-11-20 19:04:17 +00002573
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002574# Test various other bits of the package's functionality
2575class TestMiscellaneous(TestEmailBase):
2576 def test_message_from_string(self):
2577 with openfile('msg_01.txt') as fp:
2578 text = fp.read()
2579 msg = email.message_from_string(text)
2580 s = StringIO()
2581 # Don't wrap/continue long headers since we're trying to test
2582 # idempotency.
2583 g = Generator(s, maxheaderlen=0)
2584 g.flatten(msg)
2585 self.assertEqual(text, s.getvalue())
2586
2587 def test_message_from_file(self):
2588 with openfile('msg_01.txt') as fp:
2589 text = fp.read()
2590 fp.seek(0)
2591 msg = email.message_from_file(fp)
2592 s = StringIO()
2593 # Don't wrap/continue long headers since we're trying to test
2594 # idempotency.
2595 g = Generator(s, maxheaderlen=0)
2596 g.flatten(msg)
2597 self.assertEqual(text, s.getvalue())
2598
2599 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002600 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002601 with openfile('msg_01.txt') as fp:
2602 text = fp.read()
2603
2604 # Create a subclass
2605 class MyMessage(Message):
2606 pass
2607
2608 msg = email.message_from_string(text, MyMessage)
2609 unless(isinstance(msg, MyMessage))
2610 # Try something more complicated
2611 with openfile('msg_02.txt') as fp:
2612 text = fp.read()
2613 msg = email.message_from_string(text, MyMessage)
2614 for subpart in msg.walk():
2615 unless(isinstance(subpart, MyMessage))
2616
2617 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002618 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002619 # Create a subclass
2620 class MyMessage(Message):
2621 pass
2622
2623 with openfile('msg_01.txt') as fp:
2624 msg = email.message_from_file(fp, MyMessage)
2625 unless(isinstance(msg, MyMessage))
2626 # Try something more complicated
2627 with openfile('msg_02.txt') as fp:
2628 msg = email.message_from_file(fp, MyMessage)
2629 for subpart in msg.walk():
2630 unless(isinstance(subpart, MyMessage))
2631
R David Murrayc27e5222012-05-25 15:01:48 -04002632 def test_custom_message_does_not_require_arguments(self):
2633 class MyMessage(Message):
2634 def __init__(self):
2635 super().__init__()
2636 msg = self._str_msg("Subject: test\n\ntest", MyMessage)
2637 self.assertTrue(isinstance(msg, MyMessage))
2638
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002639 def test__all__(self):
2640 module = __import__('email')
R David Murray1b6c7242012-03-16 22:43:05 -04002641 self.assertEqual(sorted(module.__all__), [
2642 'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2643 'generator', 'header', 'iterators', 'message',
2644 'message_from_binary_file', 'message_from_bytes',
2645 'message_from_file', 'message_from_string', 'mime', 'parser',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002646 'quoprimime', 'utils',
2647 ])
2648
2649 def test_formatdate(self):
2650 now = time.time()
2651 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2652 time.gmtime(now)[:6])
2653
2654 def test_formatdate_localtime(self):
2655 now = time.time()
2656 self.assertEqual(
2657 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2658 time.localtime(now)[:6])
2659
2660 def test_formatdate_usegmt(self):
2661 now = time.time()
2662 self.assertEqual(
2663 utils.formatdate(now, localtime=False),
2664 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2665 self.assertEqual(
2666 utils.formatdate(now, localtime=False, usegmt=True),
2667 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2668
2669 def test_parsedate_none(self):
2670 self.assertEqual(utils.parsedate(''), None)
2671
2672 def test_parsedate_compact(self):
2673 # The FWS after the comma is optional
2674 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2675 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2676
2677 def test_parsedate_no_dayofweek(self):
2678 eq = self.assertEqual
2679 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2680 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2681
2682 def test_parsedate_compact_no_dayofweek(self):
2683 eq = self.assertEqual
2684 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2685 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2686
R. David Murray4a62e892010-12-23 20:35:46 +00002687 def test_parsedate_no_space_before_positive_offset(self):
2688 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2689 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2690
2691 def test_parsedate_no_space_before_negative_offset(self):
2692 # Issue 1155362: we already handled '+' for this case.
2693 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2694 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2695
2696
R David Murrayaccd1c02011-03-13 20:06:23 -04002697 def test_parsedate_accepts_time_with_dots(self):
2698 eq = self.assertEqual
2699 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2700 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2701 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2702 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2703
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002704 def test_parsedate_acceptable_to_time_functions(self):
2705 eq = self.assertEqual
2706 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2707 t = int(time.mktime(timetup))
2708 eq(time.localtime(t)[:6], timetup[:6])
2709 eq(int(time.strftime('%Y', timetup)), 2003)
2710 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2711 t = int(time.mktime(timetup[:9]))
2712 eq(time.localtime(t)[:6], timetup[:6])
2713 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2714
R. David Murray219d1c82010-08-25 00:45:55 +00002715 def test_parsedate_y2k(self):
2716 """Test for parsing a date with a two-digit year.
2717
2718 Parsing a date with a two-digit year should return the correct
2719 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2720 obsoletes RFC822) requires four-digit years.
2721
2722 """
2723 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2724 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2725 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2726 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2727
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002728 def test_parseaddr_empty(self):
2729 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2730 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2731
2732 def test_noquote_dump(self):
2733 self.assertEqual(
2734 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2735 'A Silly Person <person@dom.ain>')
2736
2737 def test_escape_dump(self):
2738 self.assertEqual(
2739 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
R David Murrayb53319f2012-03-14 15:31:47 -04002740 r'"A (Very) Silly Person" <person@dom.ain>')
2741 self.assertEqual(
2742 utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
2743 ('A (Very) Silly Person', 'person@dom.ain'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002744 a = r'A \(Special\) Person'
2745 b = 'person@dom.ain'
2746 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2747
2748 def test_escape_backslashes(self):
2749 self.assertEqual(
2750 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2751 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2752 a = r'Arthur \Backslash\ Foobar'
2753 b = 'person@dom.ain'
2754 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2755
R David Murray8debacb2011-04-06 09:35:57 -04002756 def test_quotes_unicode_names(self):
2757 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2758 name = "H\u00e4ns W\u00fcrst"
2759 addr = 'person@dom.ain'
2760 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2761 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
2762 self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
2763 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
2764 latin1_quopri)
2765
2766 def test_accepts_any_charset_like_object(self):
2767 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2768 name = "H\u00e4ns W\u00fcrst"
2769 addr = 'person@dom.ain'
2770 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2771 foobar = "FOOBAR"
2772 class CharsetMock:
2773 def header_encode(self, string):
2774 return foobar
2775 mock = CharsetMock()
2776 mock_expected = "%s <%s>" % (foobar, addr)
2777 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
2778 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
2779 utf8_base64)
2780
2781 def test_invalid_charset_like_object_raises_error(self):
2782 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2783 name = "H\u00e4ns W\u00fcrst"
2784 addr = 'person@dom.ain'
2785 # A object without a header_encode method:
2786 bad_charset = object()
2787 self.assertRaises(AttributeError, utils.formataddr, (name, addr),
2788 bad_charset)
2789
2790 def test_unicode_address_raises_error(self):
2791 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2792 addr = 'pers\u00f6n@dom.in'
2793 self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
2794 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
2795
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002796 def test_name_with_dot(self):
2797 x = 'John X. Doe <jxd@example.com>'
2798 y = '"John X. Doe" <jxd@example.com>'
2799 a, b = ('John X. Doe', 'jxd@example.com')
2800 self.assertEqual(utils.parseaddr(x), (a, b))
2801 self.assertEqual(utils.parseaddr(y), (a, b))
2802 # formataddr() quotes the name if there's a dot in it
2803 self.assertEqual(utils.formataddr((a, b)), y)
2804
R. David Murray5397e862010-10-02 15:58:26 +00002805 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2806 # issue 10005. Note that in the third test the second pair of
2807 # backslashes is not actually a quoted pair because it is not inside a
2808 # comment or quoted string: the address being parsed has a quoted
2809 # string containing a quoted backslash, followed by 'example' and two
2810 # backslashes, followed by another quoted string containing a space and
2811 # the word 'example'. parseaddr copies those two backslashes
2812 # literally. Per rfc5322 this is not technically correct since a \ may
2813 # not appear in an address outside of a quoted string. It is probably
2814 # a sensible Postel interpretation, though.
2815 eq = self.assertEqual
2816 eq(utils.parseaddr('""example" example"@example.com'),
2817 ('', '""example" example"@example.com'))
2818 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2819 ('', '"\\"example\\" example"@example.com'))
2820 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2821 ('', '"\\\\"example\\\\" example"@example.com'))
2822
R. David Murray63563cd2010-12-18 18:25:38 +00002823 def test_parseaddr_preserves_spaces_in_local_part(self):
2824 # issue 9286. A normal RFC5322 local part should not contain any
2825 # folding white space, but legacy local parts can (they are a sequence
2826 # of atoms, not dotatoms). On the other hand we strip whitespace from
2827 # before the @ and around dots, on the assumption that the whitespace
2828 # around the punctuation is a mistake in what would otherwise be
2829 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2830 self.assertEqual(('', "merwok wok@xample.com"),
2831 utils.parseaddr("merwok wok@xample.com"))
2832 self.assertEqual(('', "merwok wok@xample.com"),
2833 utils.parseaddr("merwok wok@xample.com"))
2834 self.assertEqual(('', "merwok wok@xample.com"),
2835 utils.parseaddr(" merwok wok @xample.com"))
2836 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2837 utils.parseaddr('merwok"wok" wok@xample.com'))
2838 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2839 utils.parseaddr('merwok. wok . wok@xample.com'))
2840
R David Murrayb53319f2012-03-14 15:31:47 -04002841 def test_formataddr_does_not_quote_parens_in_quoted_string(self):
2842 addr = ("'foo@example.com' (foo@example.com)",
2843 'foo@example.com')
2844 addrstr = ('"\'foo@example.com\' '
2845 '(foo@example.com)" <foo@example.com>')
2846 self.assertEqual(utils.parseaddr(addrstr), addr)
2847 self.assertEqual(utils.formataddr(addr), addrstr)
2848
2849
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002850 def test_multiline_from_comment(self):
2851 x = """\
2852Foo
2853\tBar <foo@example.com>"""
2854 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2855
2856 def test_quote_dump(self):
2857 self.assertEqual(
2858 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2859 r'"A Silly; Person" <person@dom.ain>')
2860
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002861 def test_charset_richcomparisons(self):
2862 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002863 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002864 cset1 = Charset()
2865 cset2 = Charset()
2866 eq(cset1, 'us-ascii')
2867 eq(cset1, 'US-ASCII')
2868 eq(cset1, 'Us-AsCiI')
2869 eq('us-ascii', cset1)
2870 eq('US-ASCII', cset1)
2871 eq('Us-AsCiI', cset1)
2872 ne(cset1, 'usascii')
2873 ne(cset1, 'USASCII')
2874 ne(cset1, 'UsAsCiI')
2875 ne('usascii', cset1)
2876 ne('USASCII', cset1)
2877 ne('UsAsCiI', cset1)
2878 eq(cset1, cset2)
2879 eq(cset2, cset1)
2880
2881 def test_getaddresses(self):
2882 eq = self.assertEqual
2883 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2884 'Bud Person <bperson@dom.ain>']),
2885 [('Al Person', 'aperson@dom.ain'),
2886 ('Bud Person', 'bperson@dom.ain')])
2887
2888 def test_getaddresses_nasty(self):
2889 eq = self.assertEqual
2890 eq(utils.getaddresses(['foo: ;']), [('', '')])
2891 eq(utils.getaddresses(
2892 ['[]*-- =~$']),
2893 [('', ''), ('', ''), ('', '*--')])
2894 eq(utils.getaddresses(
2895 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2896 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2897
2898 def test_getaddresses_embedded_comment(self):
2899 """Test proper handling of a nested comment"""
2900 eq = self.assertEqual
2901 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2902 eq(addrs[0][1], 'foo@bar.com')
2903
2904 def test_utils_quote_unquote(self):
2905 eq = self.assertEqual
2906 msg = Message()
2907 msg.add_header('content-disposition', 'attachment',
2908 filename='foo\\wacky"name')
2909 eq(msg.get_filename(), 'foo\\wacky"name')
2910
2911 def test_get_body_encoding_with_bogus_charset(self):
2912 charset = Charset('not a charset')
2913 self.assertEqual(charset.get_body_encoding(), 'base64')
2914
2915 def test_get_body_encoding_with_uppercase_charset(self):
2916 eq = self.assertEqual
2917 msg = Message()
2918 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2919 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2920 charsets = msg.get_charsets()
2921 eq(len(charsets), 1)
2922 eq(charsets[0], 'utf-8')
2923 charset = Charset(charsets[0])
2924 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002925 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002926 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2927 eq(msg.get_payload(decode=True), b'hello world')
2928 eq(msg['content-transfer-encoding'], 'base64')
2929 # Try another one
2930 msg = Message()
2931 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2932 charsets = msg.get_charsets()
2933 eq(len(charsets), 1)
2934 eq(charsets[0], 'us-ascii')
2935 charset = Charset(charsets[0])
2936 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2937 msg.set_payload('hello world', charset=charset)
2938 eq(msg.get_payload(), 'hello world')
2939 eq(msg['content-transfer-encoding'], '7bit')
2940
2941 def test_charsets_case_insensitive(self):
2942 lc = Charset('us-ascii')
2943 uc = Charset('US-ASCII')
2944 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2945
2946 def test_partial_falls_inside_message_delivery_status(self):
2947 eq = self.ndiffAssertEqual
2948 # The Parser interface provides chunks of data to FeedParser in 8192
2949 # byte gulps. SF bug #1076485 found one of those chunks inside
2950 # message/delivery-status header block, which triggered an
2951 # unreadline() of NeedMoreData.
2952 msg = self._msgobj('msg_43.txt')
2953 sfp = StringIO()
2954 iterators._structure(msg, sfp)
2955 eq(sfp.getvalue(), """\
2956multipart/report
2957 text/plain
2958 message/delivery-status
2959 text/plain
2960 text/plain
2961 text/plain
2962 text/plain
2963 text/plain
2964 text/plain
2965 text/plain
2966 text/plain
2967 text/plain
2968 text/plain
2969 text/plain
2970 text/plain
2971 text/plain
2972 text/plain
2973 text/plain
2974 text/plain
2975 text/plain
2976 text/plain
2977 text/plain
2978 text/plain
2979 text/plain
2980 text/plain
2981 text/plain
2982 text/plain
2983 text/plain
2984 text/plain
2985 text/rfc822-headers
2986""")
2987
R. David Murraya0b44b52010-12-02 21:47:19 +00002988 def test_make_msgid_domain(self):
2989 self.assertEqual(
2990 email.utils.make_msgid(domain='testdomain-string')[-19:],
2991 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002992
Ezio Melottib3aedd42010-11-20 19:04:17 +00002993
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002994# Test the iterator/generators
2995class TestIterators(TestEmailBase):
2996 def test_body_line_iterator(self):
2997 eq = self.assertEqual
2998 neq = self.ndiffAssertEqual
2999 # First a simple non-multipart message
3000 msg = self._msgobj('msg_01.txt')
3001 it = iterators.body_line_iterator(msg)
3002 lines = list(it)
3003 eq(len(lines), 6)
3004 neq(EMPTYSTRING.join(lines), msg.get_payload())
3005 # Now a more complicated multipart
3006 msg = self._msgobj('msg_02.txt')
3007 it = iterators.body_line_iterator(msg)
3008 lines = list(it)
3009 eq(len(lines), 43)
3010 with openfile('msg_19.txt') as fp:
3011 neq(EMPTYSTRING.join(lines), fp.read())
3012
3013 def test_typed_subpart_iterator(self):
3014 eq = self.assertEqual
3015 msg = self._msgobj('msg_04.txt')
3016 it = iterators.typed_subpart_iterator(msg, 'text')
3017 lines = []
3018 subparts = 0
3019 for subpart in it:
3020 subparts += 1
3021 lines.append(subpart.get_payload())
3022 eq(subparts, 2)
3023 eq(EMPTYSTRING.join(lines), """\
3024a simple kind of mirror
3025to reflect upon our own
3026a simple kind of mirror
3027to reflect upon our own
3028""")
3029
3030 def test_typed_subpart_iterator_default_type(self):
3031 eq = self.assertEqual
3032 msg = self._msgobj('msg_03.txt')
3033 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
3034 lines = []
3035 subparts = 0
3036 for subpart in it:
3037 subparts += 1
3038 lines.append(subpart.get_payload())
3039 eq(subparts, 1)
3040 eq(EMPTYSTRING.join(lines), """\
3041
3042Hi,
3043
3044Do you like this message?
3045
3046-Me
3047""")
3048
R. David Murray45bf773f2010-07-17 01:19:57 +00003049 def test_pushCR_LF(self):
3050 '''FeedParser BufferedSubFile.push() assumed it received complete
3051 line endings. A CR ending one push() followed by a LF starting
3052 the next push() added an empty line.
3053 '''
3054 imt = [
3055 ("a\r \n", 2),
3056 ("b", 0),
3057 ("c\n", 1),
3058 ("", 0),
3059 ("d\r\n", 1),
3060 ("e\r", 0),
3061 ("\nf", 1),
3062 ("\r\n", 1),
3063 ]
3064 from email.feedparser import BufferedSubFile, NeedMoreData
3065 bsf = BufferedSubFile()
3066 om = []
3067 nt = 0
3068 for il, n in imt:
3069 bsf.push(il)
3070 nt += n
3071 n1 = 0
3072 while True:
3073 ol = bsf.readline()
3074 if ol == NeedMoreData:
3075 break
3076 om.append(ol)
3077 n1 += 1
3078 self.assertTrue(n == n1)
3079 self.assertTrue(len(om) == nt)
3080 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
3081
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003082
Ezio Melottib3aedd42010-11-20 19:04:17 +00003083
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003084class TestParsers(TestEmailBase):
R David Murrayb35c8502011-04-13 16:46:05 -04003085
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003086 def test_header_parser(self):
3087 eq = self.assertEqual
3088 # Parse only the headers of a complex multipart MIME document
3089 with openfile('msg_02.txt') as fp:
3090 msg = HeaderParser().parse(fp)
3091 eq(msg['from'], 'ppp-request@zzz.org')
3092 eq(msg['to'], 'ppp@zzz.org')
3093 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003094 self.assertFalse(msg.is_multipart())
3095 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003096
R David Murrayb35c8502011-04-13 16:46:05 -04003097 def test_bytes_header_parser(self):
3098 eq = self.assertEqual
3099 # Parse only the headers of a complex multipart MIME document
3100 with openfile('msg_02.txt', 'rb') as fp:
3101 msg = email.parser.BytesHeaderParser().parse(fp)
3102 eq(msg['from'], 'ppp-request@zzz.org')
3103 eq(msg['to'], 'ppp@zzz.org')
3104 eq(msg.get_content_type(), 'multipart/mixed')
3105 self.assertFalse(msg.is_multipart())
3106 self.assertTrue(isinstance(msg.get_payload(), str))
3107 self.assertTrue(isinstance(msg.get_payload(decode=True), bytes))
3108
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003109 def test_whitespace_continuation(self):
3110 eq = self.assertEqual
3111 # This message contains a line after the Subject: header that has only
3112 # whitespace, but it is not empty!
3113 msg = email.message_from_string("""\
3114From: aperson@dom.ain
3115To: bperson@dom.ain
3116Subject: the next line has a space on it
3117\x20
3118Date: Mon, 8 Apr 2002 15:09:19 -0400
3119Message-ID: spam
3120
3121Here's the message body
3122""")
3123 eq(msg['subject'], 'the next line has a space on it\n ')
3124 eq(msg['message-id'], 'spam')
3125 eq(msg.get_payload(), "Here's the message body\n")
3126
3127 def test_whitespace_continuation_last_header(self):
3128 eq = self.assertEqual
3129 # Like the previous test, but the subject line is the last
3130 # header.
3131 msg = email.message_from_string("""\
3132From: aperson@dom.ain
3133To: bperson@dom.ain
3134Date: Mon, 8 Apr 2002 15:09:19 -0400
3135Message-ID: spam
3136Subject: the next line has a space on it
3137\x20
3138
3139Here's the message body
3140""")
3141 eq(msg['subject'], 'the next line has a space on it\n ')
3142 eq(msg['message-id'], 'spam')
3143 eq(msg.get_payload(), "Here's the message body\n")
3144
3145 def test_crlf_separation(self):
3146 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003147 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003148 msg = Parser().parse(fp)
3149 eq(len(msg.get_payload()), 2)
3150 part1 = msg.get_payload(0)
3151 eq(part1.get_content_type(), 'text/plain')
3152 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3153 part2 = msg.get_payload(1)
3154 eq(part2.get_content_type(), 'application/riscos')
3155
R. David Murray8451c4b2010-10-23 22:19:56 +00003156 def test_crlf_flatten(self):
3157 # Using newline='\n' preserves the crlfs in this input file.
3158 with openfile('msg_26.txt', newline='\n') as fp:
3159 text = fp.read()
3160 msg = email.message_from_string(text)
3161 s = StringIO()
3162 g = Generator(s)
3163 g.flatten(msg, linesep='\r\n')
3164 self.assertEqual(s.getvalue(), text)
3165
3166 maxDiff = None
3167
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003168 def test_multipart_digest_with_extra_mime_headers(self):
3169 eq = self.assertEqual
3170 neq = self.ndiffAssertEqual
3171 with openfile('msg_28.txt') as fp:
3172 msg = email.message_from_file(fp)
3173 # Structure is:
3174 # multipart/digest
3175 # message/rfc822
3176 # text/plain
3177 # message/rfc822
3178 # text/plain
3179 eq(msg.is_multipart(), 1)
3180 eq(len(msg.get_payload()), 2)
3181 part1 = msg.get_payload(0)
3182 eq(part1.get_content_type(), 'message/rfc822')
3183 eq(part1.is_multipart(), 1)
3184 eq(len(part1.get_payload()), 1)
3185 part1a = part1.get_payload(0)
3186 eq(part1a.is_multipart(), 0)
3187 eq(part1a.get_content_type(), 'text/plain')
3188 neq(part1a.get_payload(), 'message 1\n')
3189 # next message/rfc822
3190 part2 = msg.get_payload(1)
3191 eq(part2.get_content_type(), 'message/rfc822')
3192 eq(part2.is_multipart(), 1)
3193 eq(len(part2.get_payload()), 1)
3194 part2a = part2.get_payload(0)
3195 eq(part2a.is_multipart(), 0)
3196 eq(part2a.get_content_type(), 'text/plain')
3197 neq(part2a.get_payload(), 'message 2\n')
3198
3199 def test_three_lines(self):
3200 # A bug report by Andrew McNamara
3201 lines = ['From: Andrew Person <aperson@dom.ain',
3202 'Subject: Test',
3203 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3204 msg = email.message_from_string(NL.join(lines))
3205 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3206
3207 def test_strip_line_feed_and_carriage_return_in_headers(self):
3208 eq = self.assertEqual
3209 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3210 value1 = 'text'
3211 value2 = 'more text'
3212 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3213 value1, value2)
3214 msg = email.message_from_string(m)
3215 eq(msg.get('Header'), value1)
3216 eq(msg.get('Next-Header'), value2)
3217
3218 def test_rfc2822_header_syntax(self):
3219 eq = self.assertEqual
3220 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3221 msg = email.message_from_string(m)
3222 eq(len(msg), 3)
3223 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3224 eq(msg.get_payload(), 'body')
3225
3226 def test_rfc2822_space_not_allowed_in_header(self):
3227 eq = self.assertEqual
3228 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3229 msg = email.message_from_string(m)
3230 eq(len(msg.keys()), 0)
3231
3232 def test_rfc2822_one_character_header(self):
3233 eq = self.assertEqual
3234 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3235 msg = email.message_from_string(m)
3236 headers = msg.keys()
3237 headers.sort()
3238 eq(headers, ['A', 'B', 'CC'])
3239 eq(msg.get_payload(), 'body')
3240
R. David Murray45e0e142010-06-16 02:19:40 +00003241 def test_CRLFLF_at_end_of_part(self):
3242 # issue 5610: feedparser should not eat two chars from body part ending
3243 # with "\r\n\n".
3244 m = (
3245 "From: foo@bar.com\n"
3246 "To: baz\n"
3247 "Mime-Version: 1.0\n"
3248 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3249 "\n"
3250 "--BOUNDARY\n"
3251 "Content-Type: text/plain\n"
3252 "\n"
3253 "body ending with CRLF newline\r\n"
3254 "\n"
3255 "--BOUNDARY--\n"
3256 )
3257 msg = email.message_from_string(m)
3258 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003259
Ezio Melottib3aedd42010-11-20 19:04:17 +00003260
R. David Murray96fd54e2010-10-08 15:55:28 +00003261class Test8BitBytesHandling(unittest.TestCase):
3262 # In Python3 all input is string, but that doesn't work if the actual input
3263 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3264 # decode byte streams using the surrogateescape error handler, and
3265 # reconvert to binary at appropriate places if we detect surrogates. This
3266 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3267 # but it does allow us to parse and preserve them, and to decode body
3268 # parts that use an 8bit CTE.
3269
3270 bodytest_msg = textwrap.dedent("""\
3271 From: foo@bar.com
3272 To: baz
3273 Mime-Version: 1.0
3274 Content-Type: text/plain; charset={charset}
3275 Content-Transfer-Encoding: {cte}
3276
3277 {bodyline}
3278 """)
3279
3280 def test_known_8bit_CTE(self):
3281 m = self.bodytest_msg.format(charset='utf-8',
3282 cte='8bit',
3283 bodyline='pöstal').encode('utf-8')
3284 msg = email.message_from_bytes(m)
3285 self.assertEqual(msg.get_payload(), "pöstal\n")
3286 self.assertEqual(msg.get_payload(decode=True),
3287 "pöstal\n".encode('utf-8'))
3288
3289 def test_unknown_8bit_CTE(self):
3290 m = self.bodytest_msg.format(charset='notavalidcharset',
3291 cte='8bit',
3292 bodyline='pöstal').encode('utf-8')
3293 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003294 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003295 self.assertEqual(msg.get_payload(decode=True),
3296 "pöstal\n".encode('utf-8'))
3297
3298 def test_8bit_in_quopri_body(self):
3299 # This is non-RFC compliant data...without 'decode' the library code
3300 # decodes the body using the charset from the headers, and because the
3301 # source byte really is utf-8 this works. This is likely to fail
3302 # against real dirty data (ie: produce mojibake), but the data is
3303 # invalid anyway so it is as good a guess as any. But this means that
3304 # this test just confirms the current behavior; that behavior is not
3305 # necessarily the best possible behavior. With 'decode' it is
3306 # returning the raw bytes, so that test should be of correct behavior,
3307 # or at least produce the same result that email4 did.
3308 m = self.bodytest_msg.format(charset='utf-8',
3309 cte='quoted-printable',
3310 bodyline='p=C3=B6stál').encode('utf-8')
3311 msg = email.message_from_bytes(m)
3312 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3313 self.assertEqual(msg.get_payload(decode=True),
3314 'pöstál\n'.encode('utf-8'))
3315
3316 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3317 # This is similar to the previous test, but proves that if the 8bit
3318 # byte is undecodeable in the specified charset, it gets replaced
3319 # by the unicode 'unknown' character. Again, this may or may not
3320 # be the ideal behavior. Note that if decode=False none of the
3321 # decoders will get involved, so this is the only test we need
3322 # for this behavior.
3323 m = self.bodytest_msg.format(charset='ascii',
3324 cte='quoted-printable',
3325 bodyline='p=C3=B6stál').encode('utf-8')
3326 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003327 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003328 self.assertEqual(msg.get_payload(decode=True),
3329 'pöstál\n'.encode('utf-8'))
3330
R David Murray80e0aee2012-05-27 21:23:34 -04003331 # test_defect_handling:test_invalid_chars_in_base64_payload
R. David Murray96fd54e2010-10-08 15:55:28 +00003332 def test_8bit_in_base64_body(self):
R David Murray80e0aee2012-05-27 21:23:34 -04003333 # If we get 8bit bytes in a base64 body, we can just ignore them
3334 # as being outside the base64 alphabet and decode anyway. But
3335 # we register a defect.
R. David Murray96fd54e2010-10-08 15:55:28 +00003336 m = self.bodytest_msg.format(charset='utf-8',
3337 cte='base64',
3338 bodyline='cMO2c3RhbAá=').encode('utf-8')
3339 msg = email.message_from_bytes(m)
3340 self.assertEqual(msg.get_payload(decode=True),
R David Murray80e0aee2012-05-27 21:23:34 -04003341 'pöstal'.encode('utf-8'))
3342 self.assertIsInstance(msg.defects[0],
3343 errors.InvalidBase64CharactersDefect)
R. David Murray96fd54e2010-10-08 15:55:28 +00003344
3345 def test_8bit_in_uuencode_body(self):
3346 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3347 # normal means, so the block is returned undecoded, but as bytes.
3348 m = self.bodytest_msg.format(charset='utf-8',
3349 cte='uuencode',
3350 bodyline='<,.V<W1A; á ').encode('utf-8')
3351 msg = email.message_from_bytes(m)
3352 self.assertEqual(msg.get_payload(decode=True),
3353 '<,.V<W1A; á \n'.encode('utf-8'))
3354
3355
R. David Murray92532142011-01-07 23:25:30 +00003356 headertest_headers = (
3357 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3358 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3359 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3360 '\tJean de Baddie',
3361 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3362 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3363 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3364 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3365 )
3366 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3367 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003368
3369 def test_get_8bit_header(self):
3370 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003371 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3372 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003373
3374 def test_print_8bit_headers(self):
3375 msg = email.message_from_bytes(self.headertest_msg)
3376 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003377 textwrap.dedent("""\
3378 From: {}
3379 To: {}
3380 Subject: {}
3381 From: {}
3382
3383 Yes, they are flying.
3384 """).format(*[expected[1] for (_, expected) in
3385 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003386
3387 def test_values_with_8bit_headers(self):
3388 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003389 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003390 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003391 'b\uFFFD\uFFFDz',
3392 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3393 'coll\uFFFD\uFFFDgue, le pouf '
3394 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003395 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003396 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003397
3398 def test_items_with_8bit_headers(self):
3399 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003400 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003401 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003402 ('To', 'b\uFFFD\uFFFDz'),
3403 ('Subject', 'Maintenant je vous '
3404 'pr\uFFFD\uFFFDsente '
3405 'mon coll\uFFFD\uFFFDgue, le pouf '
3406 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3407 '\tJean de Baddie'),
3408 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003409
3410 def test_get_all_with_8bit_headers(self):
3411 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003412 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003413 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003414 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003415
R David Murraya2150232011-03-16 21:11:23 -04003416 def test_get_content_type_with_8bit(self):
3417 msg = email.message_from_bytes(textwrap.dedent("""\
3418 Content-Type: text/pl\xA7in; charset=utf-8
3419 """).encode('latin-1'))
3420 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3421 self.assertEqual(msg.get_content_maintype(), "text")
3422 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3423
3424 def test_get_params_with_8bit(self):
3425 msg = email.message_from_bytes(
3426 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3427 self.assertEqual(msg.get_params(header='x-header'),
3428 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3429 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3430 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3431 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3432
3433 def test_get_rfc2231_params_with_8bit(self):
3434 msg = email.message_from_bytes(textwrap.dedent("""\
3435 Content-Type: text/plain; charset=us-ascii;
3436 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3437 ).encode('latin-1'))
3438 self.assertEqual(msg.get_param('title'),
3439 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3440
3441 def test_set_rfc2231_params_with_8bit(self):
3442 msg = email.message_from_bytes(textwrap.dedent("""\
3443 Content-Type: text/plain; charset=us-ascii;
3444 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3445 ).encode('latin-1'))
3446 msg.set_param('title', 'test')
3447 self.assertEqual(msg.get_param('title'), 'test')
3448
3449 def test_del_rfc2231_params_with_8bit(self):
3450 msg = email.message_from_bytes(textwrap.dedent("""\
3451 Content-Type: text/plain; charset=us-ascii;
3452 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3453 ).encode('latin-1'))
3454 msg.del_param('title')
3455 self.assertEqual(msg.get_param('title'), None)
3456 self.assertEqual(msg.get_content_maintype(), 'text')
3457
3458 def test_get_payload_with_8bit_cte_header(self):
3459 msg = email.message_from_bytes(textwrap.dedent("""\
3460 Content-Transfer-Encoding: b\xa7se64
3461 Content-Type: text/plain; charset=latin-1
3462
3463 payload
3464 """).encode('latin-1'))
3465 self.assertEqual(msg.get_payload(), 'payload\n')
3466 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3467
R. David Murray96fd54e2010-10-08 15:55:28 +00003468 non_latin_bin_msg = textwrap.dedent("""\
3469 From: foo@bar.com
3470 To: báz
3471 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3472 \tJean de Baddie
3473 Mime-Version: 1.0
3474 Content-Type: text/plain; charset="utf-8"
3475 Content-Transfer-Encoding: 8bit
3476
3477 Да, они летят.
3478 """).encode('utf-8')
3479
3480 def test_bytes_generator(self):
3481 msg = email.message_from_bytes(self.non_latin_bin_msg)
3482 out = BytesIO()
3483 email.generator.BytesGenerator(out).flatten(msg)
3484 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3485
R. David Murray7372a072011-01-26 21:21:32 +00003486 def test_bytes_generator_handles_None_body(self):
3487 #Issue 11019
3488 msg = email.message.Message()
3489 out = BytesIO()
3490 email.generator.BytesGenerator(out).flatten(msg)
3491 self.assertEqual(out.getvalue(), b"\n")
3492
R. David Murray92532142011-01-07 23:25:30 +00003493 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003494 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003495 To: =?unknown-8bit?q?b=C3=A1z?=
3496 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3497 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3498 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003499 Mime-Version: 1.0
3500 Content-Type: text/plain; charset="utf-8"
3501 Content-Transfer-Encoding: base64
3502
3503 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3504 """)
3505
3506 def test_generator_handles_8bit(self):
3507 msg = email.message_from_bytes(self.non_latin_bin_msg)
3508 out = StringIO()
3509 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003510 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003511
3512 def test_bytes_generator_with_unix_from(self):
3513 # The unixfrom contains a current date, so we can't check it
3514 # literally. Just make sure the first word is 'From' and the
3515 # rest of the message matches the input.
3516 msg = email.message_from_bytes(self.non_latin_bin_msg)
3517 out = BytesIO()
3518 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3519 lines = out.getvalue().split(b'\n')
3520 self.assertEqual(lines[0].split()[0], b'From')
3521 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3522
R. David Murray92532142011-01-07 23:25:30 +00003523 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3524 non_latin_bin_msg_as7bit[2:4] = [
3525 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3526 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3527 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3528
R. David Murray96fd54e2010-10-08 15:55:28 +00003529 def test_message_from_binary_file(self):
3530 fn = 'test.msg'
3531 self.addCleanup(unlink, fn)
3532 with open(fn, 'wb') as testfile:
3533 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003534 with open(fn, 'rb') as testfile:
3535 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003536 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3537
3538 latin_bin_msg = textwrap.dedent("""\
3539 From: foo@bar.com
3540 To: Dinsdale
3541 Subject: Nudge nudge, wink, wink
3542 Mime-Version: 1.0
3543 Content-Type: text/plain; charset="latin-1"
3544 Content-Transfer-Encoding: 8bit
3545
3546 oh là là, know what I mean, know what I mean?
3547 """).encode('latin-1')
3548
3549 latin_bin_msg_as7bit = textwrap.dedent("""\
3550 From: foo@bar.com
3551 To: Dinsdale
3552 Subject: Nudge nudge, wink, wink
3553 Mime-Version: 1.0
3554 Content-Type: text/plain; charset="iso-8859-1"
3555 Content-Transfer-Encoding: quoted-printable
3556
3557 oh l=E0 l=E0, know what I mean, know what I mean?
3558 """)
3559
3560 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3561 m = email.message_from_bytes(self.latin_bin_msg)
3562 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3563
3564 def test_decoded_generator_emits_unicode_body(self):
3565 m = email.message_from_bytes(self.latin_bin_msg)
3566 out = StringIO()
3567 email.generator.DecodedGenerator(out).flatten(m)
3568 #DecodedHeader output contains an extra blank line compared
3569 #to the input message. RDM: not sure if this is a bug or not,
3570 #but it is not specific to the 8bit->7bit conversion.
3571 self.assertEqual(out.getvalue(),
3572 self.latin_bin_msg.decode('latin-1')+'\n')
3573
3574 def test_bytes_feedparser(self):
3575 bfp = email.feedparser.BytesFeedParser()
3576 for i in range(0, len(self.latin_bin_msg), 10):
3577 bfp.feed(self.latin_bin_msg[i:i+10])
3578 m = bfp.close()
3579 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3580
R. David Murray8451c4b2010-10-23 22:19:56 +00003581 def test_crlf_flatten(self):
3582 with openfile('msg_26.txt', 'rb') as fp:
3583 text = fp.read()
3584 msg = email.message_from_bytes(text)
3585 s = BytesIO()
3586 g = email.generator.BytesGenerator(s)
3587 g.flatten(msg, linesep='\r\n')
3588 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003589
3590 def test_8bit_multipart(self):
3591 # Issue 11605
3592 source = textwrap.dedent("""\
3593 Date: Fri, 18 Mar 2011 17:15:43 +0100
3594 To: foo@example.com
3595 From: foodwatch-Newsletter <bar@example.com>
3596 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3597 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3598 MIME-Version: 1.0
3599 Content-Type: multipart/alternative;
3600 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3601
3602 --b1_76a486bee62b0d200f33dc2ca08220ad
3603 Content-Type: text/plain; charset="utf-8"
3604 Content-Transfer-Encoding: 8bit
3605
3606 Guten Tag, ,
3607
3608 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3609 Nachrichten aus Japan.
3610
3611
3612 --b1_76a486bee62b0d200f33dc2ca08220ad
3613 Content-Type: text/html; charset="utf-8"
3614 Content-Transfer-Encoding: 8bit
3615
3616 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3617 "http://www.w3.org/TR/html4/loose.dtd">
3618 <html lang="de">
3619 <head>
3620 <title>foodwatch - Newsletter</title>
3621 </head>
3622 <body>
3623 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3624 die Nachrichten aus Japan.</p>
3625 </body>
3626 </html>
3627 --b1_76a486bee62b0d200f33dc2ca08220ad--
3628
3629 """).encode('utf-8')
3630 msg = email.message_from_bytes(source)
3631 s = BytesIO()
3632 g = email.generator.BytesGenerator(s)
3633 g.flatten(msg)
3634 self.assertEqual(s.getvalue(), source)
3635
R David Murray9fd170e2012-03-14 14:05:03 -04003636 def test_bytes_generator_b_encoding_linesep(self):
3637 # Issue 14062: b encoding was tacking on an extra \n.
3638 m = Message()
3639 # This has enough non-ascii that it should always end up b encoded.
3640 m['Subject'] = Header('žluťoučký kůň')
3641 s = BytesIO()
3642 g = email.generator.BytesGenerator(s)
3643 g.flatten(m, linesep='\r\n')
3644 self.assertEqual(
3645 s.getvalue(),
3646 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3647
3648 def test_generator_b_encoding_linesep(self):
3649 # Since this broke in ByteGenerator, test Generator for completeness.
3650 m = Message()
3651 # This has enough non-ascii that it should always end up b encoded.
3652 m['Subject'] = Header('žluťoučký kůň')
3653 s = StringIO()
3654 g = email.generator.Generator(s)
3655 g.flatten(m, linesep='\r\n')
3656 self.assertEqual(
3657 s.getvalue(),
3658 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3659
R. David Murray8451c4b2010-10-23 22:19:56 +00003660 maxDiff = None
3661
Ezio Melottib3aedd42010-11-20 19:04:17 +00003662
R. David Murray719a4492010-11-21 16:53:48 +00003663class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003664
R. David Murraye5db2632010-11-20 15:10:13 +00003665 maxDiff = None
3666
R. David Murray96fd54e2010-10-08 15:55:28 +00003667 def _msgobj(self, filename):
3668 with openfile(filename, 'rb') as fp:
3669 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003670 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003671 msg = email.message_from_bytes(data)
3672 return msg, data
3673
R. David Murray719a4492010-11-21 16:53:48 +00003674 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003675 b = BytesIO()
3676 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003677 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R David Murraya46ed112011-03-31 13:11:40 -04003678 self.assertEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003679
3680
R. David Murray719a4492010-11-21 16:53:48 +00003681class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3682 TestIdempotent):
3683 linesep = '\n'
3684 blinesep = b'\n'
3685 normalize_linesep_regex = re.compile(br'\r\n')
3686
3687
3688class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3689 TestIdempotent):
3690 linesep = '\r\n'
3691 blinesep = b'\r\n'
3692 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3693
Ezio Melottib3aedd42010-11-20 19:04:17 +00003694
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003695class TestBase64(unittest.TestCase):
3696 def test_len(self):
3697 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003698 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003699 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003700 for size in range(15):
3701 if size == 0 : bsize = 0
3702 elif size <= 3 : bsize = 4
3703 elif size <= 6 : bsize = 8
3704 elif size <= 9 : bsize = 12
3705 elif size <= 12: bsize = 16
3706 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003707 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003708
3709 def test_decode(self):
3710 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003711 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003712 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003713
3714 def test_encode(self):
3715 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003716 eq(base64mime.body_encode(b''), b'')
3717 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003718 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003719 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003720 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003721 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003722eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3723eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3724eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3725eHh4eCB4eHh4IA==
3726""")
3727 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003728 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003729 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003730eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3731eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3732eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3733eHh4eCB4eHh4IA==\r
3734""")
3735
3736 def test_header_encode(self):
3737 eq = self.assertEqual
3738 he = base64mime.header_encode
3739 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003740 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3741 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003742 # Test the charset option
3743 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3744 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003745
3746
Ezio Melottib3aedd42010-11-20 19:04:17 +00003747
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003748class TestQuopri(unittest.TestCase):
3749 def setUp(self):
3750 # Set of characters (as byte integers) that don't need to be encoded
3751 # in headers.
3752 self.hlit = list(chain(
3753 range(ord('a'), ord('z') + 1),
3754 range(ord('A'), ord('Z') + 1),
3755 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003756 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003757 # Set of characters (as byte integers) that do need to be encoded in
3758 # headers.
3759 self.hnon = [c for c in range(256) if c not in self.hlit]
3760 assert len(self.hlit) + len(self.hnon) == 256
3761 # Set of characters (as byte integers) that don't need to be encoded
3762 # in bodies.
3763 self.blit = list(range(ord(' '), ord('~') + 1))
3764 self.blit.append(ord('\t'))
3765 self.blit.remove(ord('='))
3766 # Set of characters (as byte integers) that do need to be encoded in
3767 # bodies.
3768 self.bnon = [c for c in range(256) if c not in self.blit]
3769 assert len(self.blit) + len(self.bnon) == 256
3770
Guido van Rossum9604e662007-08-30 03:46:43 +00003771 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003772 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003773 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003774 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003775 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003776 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003777 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003778
Guido van Rossum9604e662007-08-30 03:46:43 +00003779 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003780 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003781 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003782 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003783 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003784 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003785 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003786
3787 def test_header_quopri_len(self):
3788 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003789 eq(quoprimime.header_length(b'hello'), 5)
3790 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003791 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003792 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003793 # =?xxx?q?...?= means 10 extra characters
3794 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003795 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3796 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003797 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003798 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003799 # =?xxx?q?...?= means 10 extra characters
3800 10)
3801 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003802 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003803 'expected length 1 for %r' % chr(c))
3804 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003805 # Space is special; it's encoded to _
3806 if c == ord(' '):
3807 continue
3808 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003809 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003810 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003811
3812 def test_body_quopri_len(self):
3813 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003814 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003815 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003816 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003817 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003818
3819 def test_quote_unquote_idempotent(self):
3820 for x in range(256):
3821 c = chr(x)
3822 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3823
R David Murrayec1b5b82011-03-23 14:19:05 -04003824 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3825 if charset is None:
3826 encoded_header = quoprimime.header_encode(header)
3827 else:
3828 encoded_header = quoprimime.header_encode(header, charset)
3829 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003830
R David Murraycafd79d2011-03-23 15:25:55 -04003831 def test_header_encode_null(self):
3832 self._test_header_encode(b'', '')
3833
R David Murrayec1b5b82011-03-23 14:19:05 -04003834 def test_header_encode_one_word(self):
3835 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3836
3837 def test_header_encode_two_lines(self):
3838 self._test_header_encode(b'hello\nworld',
3839 '=?iso-8859-1?q?hello=0Aworld?=')
3840
3841 def test_header_encode_non_ascii(self):
3842 self._test_header_encode(b'hello\xc7there',
3843 '=?iso-8859-1?q?hello=C7there?=')
3844
3845 def test_header_encode_alt_charset(self):
3846 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3847 charset='iso-8859-2')
3848
3849 def _test_header_decode(self, encoded_header, expected_decoded_header):
3850 decoded_header = quoprimime.header_decode(encoded_header)
3851 self.assertEqual(decoded_header, expected_decoded_header)
3852
3853 def test_header_decode_null(self):
3854 self._test_header_decode('', '')
3855
3856 def test_header_decode_one_word(self):
3857 self._test_header_decode('hello', 'hello')
3858
3859 def test_header_decode_two_lines(self):
3860 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3861
3862 def test_header_decode_non_ascii(self):
3863 self._test_header_decode('hello=C7there', 'hello\xc7there')
3864
3865 def _test_decode(self, encoded, expected_decoded, eol=None):
3866 if eol is None:
3867 decoded = quoprimime.decode(encoded)
3868 else:
3869 decoded = quoprimime.decode(encoded, eol=eol)
3870 self.assertEqual(decoded, expected_decoded)
3871
3872 def test_decode_null_word(self):
3873 self._test_decode('', '')
3874
3875 def test_decode_null_line_null_word(self):
3876 self._test_decode('\r\n', '\n')
3877
3878 def test_decode_one_word(self):
3879 self._test_decode('hello', 'hello')
3880
3881 def test_decode_one_word_eol(self):
3882 self._test_decode('hello', 'hello', eol='X')
3883
3884 def test_decode_one_line(self):
3885 self._test_decode('hello\r\n', 'hello\n')
3886
3887 def test_decode_one_line_lf(self):
3888 self._test_decode('hello\n', 'hello\n')
3889
R David Murraycafd79d2011-03-23 15:25:55 -04003890 def test_decode_one_line_cr(self):
3891 self._test_decode('hello\r', 'hello\n')
3892
3893 def test_decode_one_line_nl(self):
3894 self._test_decode('hello\n', 'helloX', eol='X')
3895
3896 def test_decode_one_line_crnl(self):
3897 self._test_decode('hello\r\n', 'helloX', eol='X')
3898
R David Murrayec1b5b82011-03-23 14:19:05 -04003899 def test_decode_one_line_one_word(self):
3900 self._test_decode('hello\r\nworld', 'hello\nworld')
3901
3902 def test_decode_one_line_one_word_eol(self):
3903 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3904
3905 def test_decode_two_lines(self):
3906 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3907
R David Murraycafd79d2011-03-23 15:25:55 -04003908 def test_decode_two_lines_eol(self):
3909 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3910
R David Murrayec1b5b82011-03-23 14:19:05 -04003911 def test_decode_one_long_line(self):
3912 self._test_decode('Spam' * 250, 'Spam' * 250)
3913
3914 def test_decode_one_space(self):
3915 self._test_decode(' ', '')
3916
3917 def test_decode_multiple_spaces(self):
3918 self._test_decode(' ' * 5, '')
3919
3920 def test_decode_one_line_trailing_spaces(self):
3921 self._test_decode('hello \r\n', 'hello\n')
3922
3923 def test_decode_two_lines_trailing_spaces(self):
3924 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3925
3926 def test_decode_quoted_word(self):
3927 self._test_decode('=22quoted=20words=22', '"quoted words"')
3928
3929 def test_decode_uppercase_quoting(self):
3930 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
3931
3932 def test_decode_lowercase_quoting(self):
3933 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
3934
3935 def test_decode_soft_line_break(self):
3936 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
3937
3938 def test_decode_false_quoting(self):
3939 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
3940
3941 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
3942 kwargs = {}
3943 if maxlinelen is None:
3944 # Use body_encode's default.
3945 maxlinelen = 76
3946 else:
3947 kwargs['maxlinelen'] = maxlinelen
3948 if eol is None:
3949 # Use body_encode's default.
3950 eol = '\n'
3951 else:
3952 kwargs['eol'] = eol
3953 encoded_body = quoprimime.body_encode(body, **kwargs)
3954 self.assertEqual(encoded_body, expected_encoded_body)
3955 if eol == '\n' or eol == '\r\n':
3956 # We know how to split the result back into lines, so maxlinelen
3957 # can be checked.
3958 for line in encoded_body.splitlines():
3959 self.assertLessEqual(len(line), maxlinelen)
3960
3961 def test_encode_null(self):
3962 self._test_encode('', '')
3963
3964 def test_encode_null_lines(self):
3965 self._test_encode('\n\n', '\n\n')
3966
3967 def test_encode_one_line(self):
3968 self._test_encode('hello\n', 'hello\n')
3969
3970 def test_encode_one_line_crlf(self):
3971 self._test_encode('hello\r\n', 'hello\n')
3972
3973 def test_encode_one_line_eol(self):
3974 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
3975
3976 def test_encode_one_space(self):
3977 self._test_encode(' ', '=20')
3978
3979 def test_encode_one_line_one_space(self):
3980 self._test_encode(' \n', '=20\n')
3981
R David Murrayb938c8c2011-03-24 12:19:26 -04003982# XXX: body_encode() expect strings, but uses ord(char) from these strings
3983# to index into a 256-entry list. For code points above 255, this will fail.
3984# Should there be a check for 8-bit only ord() values in body, or at least
3985# a comment about the expected input?
3986
3987 def test_encode_two_lines_one_space(self):
3988 self._test_encode(' \n \n', '=20\n=20\n')
3989
R David Murrayec1b5b82011-03-23 14:19:05 -04003990 def test_encode_one_word_trailing_spaces(self):
3991 self._test_encode('hello ', 'hello =20')
3992
3993 def test_encode_one_line_trailing_spaces(self):
3994 self._test_encode('hello \n', 'hello =20\n')
3995
3996 def test_encode_one_word_trailing_tab(self):
3997 self._test_encode('hello \t', 'hello =09')
3998
3999 def test_encode_one_line_trailing_tab(self):
4000 self._test_encode('hello \t\n', 'hello =09\n')
4001
4002 def test_encode_trailing_space_before_maxlinelen(self):
4003 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
4004
R David Murrayb938c8c2011-03-24 12:19:26 -04004005 def test_encode_trailing_space_at_maxlinelen(self):
4006 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
4007
R David Murrayec1b5b82011-03-23 14:19:05 -04004008 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04004009 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
4010
4011 def test_encode_whitespace_lines(self):
4012 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04004013
4014 def test_encode_quoted_equals(self):
4015 self._test_encode('a = b', 'a =3D b')
4016
4017 def test_encode_one_long_string(self):
4018 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
4019
4020 def test_encode_one_long_line(self):
4021 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4022
4023 def test_encode_one_very_long_line(self):
4024 self._test_encode('x' * 200 + '\n',
4025 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
4026
4027 def test_encode_one_long_line(self):
4028 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
4029
4030 def test_encode_shortest_maxlinelen(self):
4031 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004032
R David Murrayb938c8c2011-03-24 12:19:26 -04004033 def test_encode_maxlinelen_too_small(self):
4034 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
4035
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004036 def test_encode(self):
4037 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00004038 eq(quoprimime.body_encode(''), '')
4039 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004040 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00004041 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004042 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00004043 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004044xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
4045 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
4046x xxxx xxxx xxxx xxxx=20""")
4047 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00004048 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
4049 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004050xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
4051 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
4052x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00004053 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004054one line
4055
4056two line"""), """\
4057one line
4058
4059two line""")
4060
4061
Ezio Melottib3aedd42010-11-20 19:04:17 +00004062
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004063# Test the Charset class
4064class TestCharset(unittest.TestCase):
4065 def tearDown(self):
4066 from email import charset as CharsetModule
4067 try:
4068 del CharsetModule.CHARSETS['fake']
4069 except KeyError:
4070 pass
4071
Guido van Rossum9604e662007-08-30 03:46:43 +00004072 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004073 eq = self.assertEqual
4074 # Make sure us-ascii = no Unicode conversion
4075 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00004076 eq(c.header_encode('Hello World!'), 'Hello World!')
4077 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004078 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00004079 self.assertRaises(UnicodeError, c.header_encode, s)
4080 c = Charset('utf-8')
4081 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004082
4083 def test_body_encode(self):
4084 eq = self.assertEqual
4085 # Try a charset with QP body encoding
4086 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004087 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004088 # Try a charset with Base64 body encoding
4089 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00004090 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004091 # Try a charset with None body encoding
4092 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004093 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004094 # Try the convert argument, where input codec != output codec
4095 c = Charset('euc-jp')
4096 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00004097 # XXX FIXME
4098## try:
4099## eq('\x1b$B5FCO;~IW\x1b(B',
4100## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4101## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4102## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4103## except LookupError:
4104## # We probably don't have the Japanese codecs installed
4105## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004106 # Testing SF bug #625509, which we have to fake, since there are no
4107 # built-in encodings where the header encoding is QP but the body
4108 # encoding is not.
4109 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04004110 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004111 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04004112 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004113
4114 def test_unicode_charset_name(self):
4115 charset = Charset('us-ascii')
4116 self.assertEqual(str(charset), 'us-ascii')
4117 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4118
4119
Ezio Melottib3aedd42010-11-20 19:04:17 +00004120
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004121# Test multilingual MIME headers.
4122class TestHeader(TestEmailBase):
4123 def test_simple(self):
4124 eq = self.ndiffAssertEqual
4125 h = Header('Hello World!')
4126 eq(h.encode(), 'Hello World!')
4127 h.append(' Goodbye World!')
4128 eq(h.encode(), 'Hello World! Goodbye World!')
4129
4130 def test_simple_surprise(self):
4131 eq = self.ndiffAssertEqual
4132 h = Header('Hello World!')
4133 eq(h.encode(), 'Hello World!')
4134 h.append('Goodbye World!')
4135 eq(h.encode(), 'Hello World! Goodbye World!')
4136
4137 def test_header_needs_no_decoding(self):
4138 h = 'no decoding needed'
4139 self.assertEqual(decode_header(h), [(h, None)])
4140
4141 def test_long(self):
4142 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4143 maxlinelen=76)
4144 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004145 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004146
4147 def test_multilingual(self):
4148 eq = self.ndiffAssertEqual
4149 g = Charset("iso-8859-1")
4150 cz = Charset("iso-8859-2")
4151 utf8 = Charset("utf-8")
4152 g_head = (b'Die Mieter treten hier ein werden mit einem '
4153 b'Foerderband komfortabel den Korridor entlang, '
4154 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4155 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4156 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4157 b'd\xf9vtipu.. ')
4158 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4159 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4160 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4161 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4162 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4163 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4164 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4165 '\u3044\u307e\u3059\u3002')
4166 h = Header(g_head, g)
4167 h.append(cz_head, cz)
4168 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004169 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004170 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004171=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4172 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4173 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4174 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004175 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4176 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4177 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4178 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004179 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4180 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4181 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4182 decoded = decode_header(enc)
4183 eq(len(decoded), 3)
4184 eq(decoded[0], (g_head, 'iso-8859-1'))
4185 eq(decoded[1], (cz_head, 'iso-8859-2'))
4186 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004187 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004188 eq(ustr,
4189 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4190 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4191 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4192 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4193 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4194 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4195 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4196 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4197 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4198 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4199 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4200 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4201 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4202 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4203 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4204 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4205 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004206 # Test make_header()
4207 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004208 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004209
4210 def test_empty_header_encode(self):
4211 h = Header()
4212 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004213
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004214 def test_header_ctor_default_args(self):
4215 eq = self.ndiffAssertEqual
4216 h = Header()
4217 eq(h, '')
4218 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004219 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004220
4221 def test_explicit_maxlinelen(self):
4222 eq = self.ndiffAssertEqual
4223 hstr = ('A very long line that must get split to something other '
4224 'than at the 76th character boundary to test the non-default '
4225 'behavior')
4226 h = Header(hstr)
4227 eq(h.encode(), '''\
4228A very long line that must get split to something other than at the 76th
4229 character boundary to test the non-default behavior''')
4230 eq(str(h), hstr)
4231 h = Header(hstr, header_name='Subject')
4232 eq(h.encode(), '''\
4233A very long line that must get split to something other than at the
4234 76th character boundary to test the non-default behavior''')
4235 eq(str(h), hstr)
4236 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4237 eq(h.encode(), hstr)
4238 eq(str(h), hstr)
4239
Guido van Rossum9604e662007-08-30 03:46:43 +00004240 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004241 eq = self.ndiffAssertEqual
4242 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004243 x = 'xxxx ' * 20
4244 h.append(x)
4245 s = h.encode()
4246 eq(s, """\
4247=?iso-8859-1?q?xxx?=
4248 =?iso-8859-1?q?x_?=
4249 =?iso-8859-1?q?xx?=
4250 =?iso-8859-1?q?xx?=
4251 =?iso-8859-1?q?_x?=
4252 =?iso-8859-1?q?xx?=
4253 =?iso-8859-1?q?x_?=
4254 =?iso-8859-1?q?xx?=
4255 =?iso-8859-1?q?xx?=
4256 =?iso-8859-1?q?_x?=
4257 =?iso-8859-1?q?xx?=
4258 =?iso-8859-1?q?x_?=
4259 =?iso-8859-1?q?xx?=
4260 =?iso-8859-1?q?xx?=
4261 =?iso-8859-1?q?_x?=
4262 =?iso-8859-1?q?xx?=
4263 =?iso-8859-1?q?x_?=
4264 =?iso-8859-1?q?xx?=
4265 =?iso-8859-1?q?xx?=
4266 =?iso-8859-1?q?_x?=
4267 =?iso-8859-1?q?xx?=
4268 =?iso-8859-1?q?x_?=
4269 =?iso-8859-1?q?xx?=
4270 =?iso-8859-1?q?xx?=
4271 =?iso-8859-1?q?_x?=
4272 =?iso-8859-1?q?xx?=
4273 =?iso-8859-1?q?x_?=
4274 =?iso-8859-1?q?xx?=
4275 =?iso-8859-1?q?xx?=
4276 =?iso-8859-1?q?_x?=
4277 =?iso-8859-1?q?xx?=
4278 =?iso-8859-1?q?x_?=
4279 =?iso-8859-1?q?xx?=
4280 =?iso-8859-1?q?xx?=
4281 =?iso-8859-1?q?_x?=
4282 =?iso-8859-1?q?xx?=
4283 =?iso-8859-1?q?x_?=
4284 =?iso-8859-1?q?xx?=
4285 =?iso-8859-1?q?xx?=
4286 =?iso-8859-1?q?_x?=
4287 =?iso-8859-1?q?xx?=
4288 =?iso-8859-1?q?x_?=
4289 =?iso-8859-1?q?xx?=
4290 =?iso-8859-1?q?xx?=
4291 =?iso-8859-1?q?_x?=
4292 =?iso-8859-1?q?xx?=
4293 =?iso-8859-1?q?x_?=
4294 =?iso-8859-1?q?xx?=
4295 =?iso-8859-1?q?xx?=
4296 =?iso-8859-1?q?_?=""")
4297 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004298 h = Header(charset='iso-8859-1', maxlinelen=40)
4299 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004300 s = h.encode()
4301 eq(s, """\
4302=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4303 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4304 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4305 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4306 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4307 eq(x, str(make_header(decode_header(s))))
4308
4309 def test_base64_splittable(self):
4310 eq = self.ndiffAssertEqual
4311 h = Header(charset='koi8-r', maxlinelen=20)
4312 x = 'xxxx ' * 20
4313 h.append(x)
4314 s = h.encode()
4315 eq(s, """\
4316=?koi8-r?b?eHh4?=
4317 =?koi8-r?b?eCB4?=
4318 =?koi8-r?b?eHh4?=
4319 =?koi8-r?b?IHh4?=
4320 =?koi8-r?b?eHgg?=
4321 =?koi8-r?b?eHh4?=
4322 =?koi8-r?b?eCB4?=
4323 =?koi8-r?b?eHh4?=
4324 =?koi8-r?b?IHh4?=
4325 =?koi8-r?b?eHgg?=
4326 =?koi8-r?b?eHh4?=
4327 =?koi8-r?b?eCB4?=
4328 =?koi8-r?b?eHh4?=
4329 =?koi8-r?b?IHh4?=
4330 =?koi8-r?b?eHgg?=
4331 =?koi8-r?b?eHh4?=
4332 =?koi8-r?b?eCB4?=
4333 =?koi8-r?b?eHh4?=
4334 =?koi8-r?b?IHh4?=
4335 =?koi8-r?b?eHgg?=
4336 =?koi8-r?b?eHh4?=
4337 =?koi8-r?b?eCB4?=
4338 =?koi8-r?b?eHh4?=
4339 =?koi8-r?b?IHh4?=
4340 =?koi8-r?b?eHgg?=
4341 =?koi8-r?b?eHh4?=
4342 =?koi8-r?b?eCB4?=
4343 =?koi8-r?b?eHh4?=
4344 =?koi8-r?b?IHh4?=
4345 =?koi8-r?b?eHgg?=
4346 =?koi8-r?b?eHh4?=
4347 =?koi8-r?b?eCB4?=
4348 =?koi8-r?b?eHh4?=
4349 =?koi8-r?b?IA==?=""")
4350 eq(x, str(make_header(decode_header(s))))
4351 h = Header(charset='koi8-r', maxlinelen=40)
4352 h.append(x)
4353 s = h.encode()
4354 eq(s, """\
4355=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4356 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4357 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4358 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4359 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4360 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4361 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004362
4363 def test_us_ascii_header(self):
4364 eq = self.assertEqual
4365 s = 'hello'
4366 x = decode_header(s)
4367 eq(x, [('hello', None)])
4368 h = make_header(x)
4369 eq(s, h.encode())
4370
4371 def test_string_charset(self):
4372 eq = self.assertEqual
4373 h = Header()
4374 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004375 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004376
4377## def test_unicode_error(self):
4378## raises = self.assertRaises
4379## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4380## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4381## h = Header()
4382## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4383## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4384## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4385
4386 def test_utf8_shortest(self):
4387 eq = self.assertEqual
4388 h = Header('p\xf6stal', 'utf-8')
4389 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4390 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4391 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4392
4393 def test_bad_8bit_header(self):
4394 raises = self.assertRaises
4395 eq = self.assertEqual
4396 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4397 raises(UnicodeError, Header, x)
4398 h = Header()
4399 raises(UnicodeError, h.append, x)
4400 e = x.decode('utf-8', 'replace')
4401 eq(str(Header(x, errors='replace')), e)
4402 h.append(x, errors='replace')
4403 eq(str(h), e)
4404
R David Murray041015c2011-03-25 15:10:55 -04004405 def test_escaped_8bit_header(self):
4406 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004407 e = x.decode('ascii', 'surrogateescape')
4408 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004409 self.assertEqual(str(h),
4410 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4411 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4412
R David Murraye5e366c2011-06-18 12:57:28 -04004413 def test_header_handles_binary_unknown8bit(self):
4414 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4415 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4416 self.assertEqual(str(h),
4417 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4418 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4419
4420 def test_make_header_handles_binary_unknown8bit(self):
4421 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4422 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4423 h2 = email.header.make_header(email.header.decode_header(h))
4424 self.assertEqual(str(h2),
4425 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4426 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4427
R David Murray041015c2011-03-25 15:10:55 -04004428 def test_modify_returned_list_does_not_change_header(self):
4429 h = Header('test')
4430 chunks = email.header.decode_header(h)
4431 chunks.append(('ascii', 'test2'))
4432 self.assertEqual(str(h), 'test')
4433
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004434 def test_encoded_adjacent_nonencoded(self):
4435 eq = self.assertEqual
4436 h = Header()
4437 h.append('hello', 'iso-8859-1')
4438 h.append('world')
4439 s = h.encode()
4440 eq(s, '=?iso-8859-1?q?hello?= world')
4441 h = make_header(decode_header(s))
4442 eq(h.encode(), s)
4443
R David Murray07ea53c2012-06-02 17:56:49 -04004444 def test_whitespace_keeper(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004445 eq = self.assertEqual
4446 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4447 parts = decode_header(s)
R David Murray07ea53c2012-06-02 17:56:49 -04004448 eq(parts, [(b'Subject: ', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b' zz.', None)])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004449 hdr = make_header(parts)
4450 eq(hdr.encode(),
4451 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4452
4453 def test_broken_base64_header(self):
4454 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004455 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004456 raises(errors.HeaderParseError, decode_header, s)
4457
R. David Murray477efb32011-01-05 01:39:32 +00004458 def test_shift_jis_charset(self):
4459 h = Header('文', charset='shift_jis')
4460 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4461
R David Murrayde912762011-03-16 18:26:23 -04004462 def test_flatten_header_with_no_value(self):
4463 # Issue 11401 (regression from email 4.x) Note that the space after
4464 # the header doesn't reflect the input, but this is also the way
4465 # email 4.x behaved. At some point it would be nice to fix that.
4466 msg = email.message_from_string("EmptyHeader:")
4467 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4468
R David Murray01581ee2011-04-18 10:04:34 -04004469 def test_encode_preserves_leading_ws_on_value(self):
4470 msg = Message()
4471 msg['SomeHeader'] = ' value with leading ws'
4472 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4473
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004474
Ezio Melottib3aedd42010-11-20 19:04:17 +00004475
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004476# Test RFC 2231 header parameters (en/de)coding
4477class TestRFC2231(TestEmailBase):
4478 def test_get_param(self):
4479 eq = self.assertEqual
4480 msg = self._msgobj('msg_29.txt')
4481 eq(msg.get_param('title'),
4482 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4483 eq(msg.get_param('title', unquote=False),
4484 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4485
4486 def test_set_param(self):
4487 eq = self.ndiffAssertEqual
4488 msg = Message()
4489 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4490 charset='us-ascii')
4491 eq(msg.get_param('title'),
4492 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4493 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4494 charset='us-ascii', language='en')
4495 eq(msg.get_param('title'),
4496 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4497 msg = self._msgobj('msg_01.txt')
4498 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4499 charset='us-ascii', language='en')
4500 eq(msg.as_string(maxheaderlen=78), """\
4501Return-Path: <bbb@zzz.org>
4502Delivered-To: bbb@zzz.org
4503Received: by mail.zzz.org (Postfix, from userid 889)
4504\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4505MIME-Version: 1.0
4506Content-Transfer-Encoding: 7bit
4507Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4508From: bbb@ddd.com (John X. Doe)
4509To: bbb@zzz.org
4510Subject: This is a test message
4511Date: Fri, 4 May 2001 14:05:44 -0400
4512Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004513 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004514
4515
4516Hi,
4517
4518Do you like this message?
4519
4520-Me
4521""")
4522
R David Murraya2860e82011-04-16 09:20:30 -04004523 def test_set_param_requote(self):
4524 msg = Message()
4525 msg.set_param('title', 'foo')
4526 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4527 msg.set_param('title', 'bar', requote=False)
4528 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4529 # tspecial is still quoted.
4530 msg.set_param('title', "(bar)bell", requote=False)
4531 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4532
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004533 def test_del_param(self):
4534 eq = self.ndiffAssertEqual
4535 msg = self._msgobj('msg_01.txt')
4536 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4537 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4538 charset='us-ascii', language='en')
4539 msg.del_param('foo', header='Content-Type')
4540 eq(msg.as_string(maxheaderlen=78), """\
4541Return-Path: <bbb@zzz.org>
4542Delivered-To: bbb@zzz.org
4543Received: by mail.zzz.org (Postfix, from userid 889)
4544\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4545MIME-Version: 1.0
4546Content-Transfer-Encoding: 7bit
4547Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4548From: bbb@ddd.com (John X. Doe)
4549To: bbb@zzz.org
4550Subject: This is a test message
4551Date: Fri, 4 May 2001 14:05:44 -0400
4552Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004553 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004554
4555
4556Hi,
4557
4558Do you like this message?
4559
4560-Me
4561""")
4562
4563 def test_rfc2231_get_content_charset(self):
4564 eq = self.assertEqual
4565 msg = self._msgobj('msg_32.txt')
4566 eq(msg.get_content_charset(), 'us-ascii')
4567
R. David Murraydfd7eb02010-12-24 22:36:49 +00004568 def test_rfc2231_parse_rfc_quoting(self):
4569 m = textwrap.dedent('''\
4570 Content-Disposition: inline;
4571 \tfilename*0*=''This%20is%20even%20more%20;
4572 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4573 \tfilename*2="is it not.pdf"
4574
4575 ''')
4576 msg = email.message_from_string(m)
4577 self.assertEqual(msg.get_filename(),
4578 'This is even more ***fun*** is it not.pdf')
4579 self.assertEqual(m, msg.as_string())
4580
4581 def test_rfc2231_parse_extra_quoting(self):
4582 m = textwrap.dedent('''\
4583 Content-Disposition: inline;
4584 \tfilename*0*="''This%20is%20even%20more%20";
4585 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4586 \tfilename*2="is it not.pdf"
4587
4588 ''')
4589 msg = email.message_from_string(m)
4590 self.assertEqual(msg.get_filename(),
4591 'This is even more ***fun*** is it not.pdf')
4592 self.assertEqual(m, msg.as_string())
4593
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004594 def test_rfc2231_no_language_or_charset(self):
4595 m = '''\
4596Content-Transfer-Encoding: 8bit
4597Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4598Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4599
4600'''
4601 msg = email.message_from_string(m)
4602 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004603 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004604 self.assertEqual(
4605 param,
4606 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4607
4608 def test_rfc2231_no_language_or_charset_in_filename(self):
4609 m = '''\
4610Content-Disposition: inline;
4611\tfilename*0*="''This%20is%20even%20more%20";
4612\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4613\tfilename*2="is it not.pdf"
4614
4615'''
4616 msg = email.message_from_string(m)
4617 self.assertEqual(msg.get_filename(),
4618 'This is even more ***fun*** is it not.pdf')
4619
4620 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4621 m = '''\
4622Content-Disposition: inline;
4623\tfilename*0*="''This%20is%20even%20more%20";
4624\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4625\tfilename*2="is it not.pdf"
4626
4627'''
4628 msg = email.message_from_string(m)
4629 self.assertEqual(msg.get_filename(),
4630 'This is even more ***fun*** is it not.pdf')
4631
4632 def test_rfc2231_partly_encoded(self):
4633 m = '''\
4634Content-Disposition: inline;
4635\tfilename*0="''This%20is%20even%20more%20";
4636\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4637\tfilename*2="is it not.pdf"
4638
4639'''
4640 msg = email.message_from_string(m)
4641 self.assertEqual(
4642 msg.get_filename(),
4643 'This%20is%20even%20more%20***fun*** is it not.pdf')
4644
4645 def test_rfc2231_partly_nonencoded(self):
4646 m = '''\
4647Content-Disposition: inline;
4648\tfilename*0="This%20is%20even%20more%20";
4649\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4650\tfilename*2="is it not.pdf"
4651
4652'''
4653 msg = email.message_from_string(m)
4654 self.assertEqual(
4655 msg.get_filename(),
4656 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4657
4658 def test_rfc2231_no_language_or_charset_in_boundary(self):
4659 m = '''\
4660Content-Type: multipart/alternative;
4661\tboundary*0*="''This%20is%20even%20more%20";
4662\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4663\tboundary*2="is it not.pdf"
4664
4665'''
4666 msg = email.message_from_string(m)
4667 self.assertEqual(msg.get_boundary(),
4668 'This is even more ***fun*** is it not.pdf')
4669
4670 def test_rfc2231_no_language_or_charset_in_charset(self):
4671 # This is a nonsensical charset value, but tests the code anyway
4672 m = '''\
4673Content-Type: text/plain;
4674\tcharset*0*="This%20is%20even%20more%20";
4675\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4676\tcharset*2="is it not.pdf"
4677
4678'''
4679 msg = email.message_from_string(m)
4680 self.assertEqual(msg.get_content_charset(),
4681 'this is even more ***fun*** is it not.pdf')
4682
4683 def test_rfc2231_bad_encoding_in_filename(self):
4684 m = '''\
4685Content-Disposition: inline;
4686\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4687\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4688\tfilename*2="is it not.pdf"
4689
4690'''
4691 msg = email.message_from_string(m)
4692 self.assertEqual(msg.get_filename(),
4693 'This is even more ***fun*** is it not.pdf')
4694
4695 def test_rfc2231_bad_encoding_in_charset(self):
4696 m = """\
4697Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4698
4699"""
4700 msg = email.message_from_string(m)
4701 # This should return None because non-ascii characters in the charset
4702 # are not allowed.
4703 self.assertEqual(msg.get_content_charset(), None)
4704
4705 def test_rfc2231_bad_character_in_charset(self):
4706 m = """\
4707Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4708
4709"""
4710 msg = email.message_from_string(m)
4711 # This should return None because non-ascii characters in the charset
4712 # are not allowed.
4713 self.assertEqual(msg.get_content_charset(), None)
4714
4715 def test_rfc2231_bad_character_in_filename(self):
4716 m = '''\
4717Content-Disposition: inline;
4718\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4719\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4720\tfilename*2*="is it not.pdf%E2"
4721
4722'''
4723 msg = email.message_from_string(m)
4724 self.assertEqual(msg.get_filename(),
4725 'This is even more ***fun*** is it not.pdf\ufffd')
4726
4727 def test_rfc2231_unknown_encoding(self):
4728 m = """\
4729Content-Transfer-Encoding: 8bit
4730Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4731
4732"""
4733 msg = email.message_from_string(m)
4734 self.assertEqual(msg.get_filename(), 'myfile.txt')
4735
4736 def test_rfc2231_single_tick_in_filename_extended(self):
4737 eq = self.assertEqual
4738 m = """\
4739Content-Type: application/x-foo;
4740\tname*0*=\"Frank's\"; name*1*=\" Document\"
4741
4742"""
4743 msg = email.message_from_string(m)
4744 charset, language, s = msg.get_param('name')
4745 eq(charset, None)
4746 eq(language, None)
4747 eq(s, "Frank's Document")
4748
4749 def test_rfc2231_single_tick_in_filename(self):
4750 m = """\
4751Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4752
4753"""
4754 msg = email.message_from_string(m)
4755 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004756 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004757 self.assertEqual(param, "Frank's Document")
4758
4759 def test_rfc2231_tick_attack_extended(self):
4760 eq = self.assertEqual
4761 m = """\
4762Content-Type: application/x-foo;
4763\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4764
4765"""
4766 msg = email.message_from_string(m)
4767 charset, language, s = msg.get_param('name')
4768 eq(charset, 'us-ascii')
4769 eq(language, 'en-us')
4770 eq(s, "Frank's Document")
4771
4772 def test_rfc2231_tick_attack(self):
4773 m = """\
4774Content-Type: application/x-foo;
4775\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4776
4777"""
4778 msg = email.message_from_string(m)
4779 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004780 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004781 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4782
4783 def test_rfc2231_no_extended_values(self):
4784 eq = self.assertEqual
4785 m = """\
4786Content-Type: application/x-foo; name=\"Frank's Document\"
4787
4788"""
4789 msg = email.message_from_string(m)
4790 eq(msg.get_param('name'), "Frank's Document")
4791
4792 def test_rfc2231_encoded_then_unencoded_segments(self):
4793 eq = self.assertEqual
4794 m = """\
4795Content-Type: application/x-foo;
4796\tname*0*=\"us-ascii'en-us'My\";
4797\tname*1=\" Document\";
4798\tname*2*=\" For You\"
4799
4800"""
4801 msg = email.message_from_string(m)
4802 charset, language, s = msg.get_param('name')
4803 eq(charset, 'us-ascii')
4804 eq(language, 'en-us')
4805 eq(s, 'My Document For You')
4806
4807 def test_rfc2231_unencoded_then_encoded_segments(self):
4808 eq = self.assertEqual
4809 m = """\
4810Content-Type: application/x-foo;
4811\tname*0=\"us-ascii'en-us'My\";
4812\tname*1*=\" Document\";
4813\tname*2*=\" For You\"
4814
4815"""
4816 msg = email.message_from_string(m)
4817 charset, language, s = msg.get_param('name')
4818 eq(charset, 'us-ascii')
4819 eq(language, 'en-us')
4820 eq(s, 'My Document For You')
4821
4822
Ezio Melottib3aedd42010-11-20 19:04:17 +00004823
R. David Murraya8f480f2010-01-16 18:30:03 +00004824# Tests to ensure that signed parts of an email are completely preserved, as
4825# required by RFC1847 section 2.1. Note that these are incomplete, because the
4826# email package does not currently always preserve the body. See issue 1670765.
4827class TestSigned(TestEmailBase):
4828
4829 def _msg_and_obj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -04004830 with openfile(filename) as fp:
R. David Murraya8f480f2010-01-16 18:30:03 +00004831 original = fp.read()
4832 msg = email.message_from_string(original)
4833 return original, msg
4834
4835 def _signed_parts_eq(self, original, result):
4836 # Extract the first mime part of each message
4837 import re
4838 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4839 inpart = repart.search(original).group(2)
4840 outpart = repart.search(result).group(2)
4841 self.assertEqual(outpart, inpart)
4842
4843 def test_long_headers_as_string(self):
4844 original, msg = self._msg_and_obj('msg_45.txt')
4845 result = msg.as_string()
4846 self._signed_parts_eq(original, result)
4847
4848 def test_long_headers_as_string_maxheaderlen(self):
4849 original, msg = self._msg_and_obj('msg_45.txt')
4850 result = msg.as_string(maxheaderlen=60)
4851 self._signed_parts_eq(original, result)
4852
4853 def test_long_headers_flatten(self):
4854 original, msg = self._msg_and_obj('msg_45.txt')
4855 fp = StringIO()
4856 Generator(fp).flatten(msg)
4857 result = fp.getvalue()
4858 self._signed_parts_eq(original, result)
4859
4860
Ezio Melottib3aedd42010-11-20 19:04:17 +00004861
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004862if __name__ == '__main__':
R David Murray9aaba782011-03-21 17:17:06 -04004863 unittest.main()