blob: 5aaf526fd836b369e1ab897e1e6a806a2958ad2e [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R. David Murray96fd54e2010-10-08 15:55:28 +000039from test.support import findfile, run_unittest, unlink
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040from email.test import __file__ as landmark
41
42
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Ezio Melottib3aedd42010-11-20 19:04:17 +000048
Guido van Rossum8b3febe2007-08-30 01:15:14 +000049def openfile(filename, *args, **kws):
50 path = os.path.join(os.path.dirname(landmark), 'data', filename)
51 return open(path, *args, **kws)
52
53
Ezio Melottib3aedd42010-11-20 19:04:17 +000054
Guido van Rossum8b3febe2007-08-30 01:15:14 +000055# Base test class
56class TestEmailBase(unittest.TestCase):
57 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000058 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000059 if first != second:
60 sfirst = str(first)
61 ssecond = str(second)
62 rfirst = [repr(line) for line in sfirst.splitlines()]
63 rsecond = [repr(line) for line in ssecond.splitlines()]
64 diff = difflib.ndiff(rfirst, rsecond)
65 raise self.failureException(NL + NL.join(diff))
66
67 def _msgobj(self, filename):
68 with openfile(findfile(filename)) as fp:
69 return email.message_from_file(fp)
70
71
Ezio Melottib3aedd42010-11-20 19:04:17 +000072
Guido van Rossum8b3febe2007-08-30 01:15:14 +000073# Test various aspects of the Message class's API
74class TestMessageAPI(TestEmailBase):
75 def test_get_all(self):
76 eq = self.assertEqual
77 msg = self._msgobj('msg_20.txt')
78 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
79 eq(msg.get_all('xx', 'n/a'), 'n/a')
80
R. David Murraye5db2632010-11-20 15:10:13 +000081 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 eq = self.assertEqual
83 msg = Message()
84 eq(msg.get_charset(), None)
85 charset = Charset('iso-8859-1')
86 msg.set_charset(charset)
87 eq(msg['mime-version'], '1.0')
88 eq(msg.get_content_type(), 'text/plain')
89 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
90 eq(msg.get_param('charset'), 'iso-8859-1')
91 eq(msg['content-transfer-encoding'], 'quoted-printable')
92 eq(msg.get_charset().input_charset, 'iso-8859-1')
93 # Remove the charset
94 msg.set_charset(None)
95 eq(msg.get_charset(), None)
96 eq(msg['content-type'], 'text/plain')
97 # Try adding a charset when there's already MIME headers present
98 msg = Message()
99 msg['MIME-Version'] = '2.0'
100 msg['Content-Type'] = 'text/x-weird'
101 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
102 msg.set_charset(charset)
103 eq(msg['mime-version'], '2.0')
104 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
105 eq(msg['content-transfer-encoding'], 'quinted-puntable')
106
107 def test_set_charset_from_string(self):
108 eq = self.assertEqual
109 msg = Message()
110 msg.set_charset('us-ascii')
111 eq(msg.get_charset().input_charset, 'us-ascii')
112 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
113
114 def test_set_payload_with_charset(self):
115 msg = Message()
116 charset = Charset('iso-8859-1')
117 msg.set_payload('This is a string payload', charset)
118 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
119
120 def test_get_charsets(self):
121 eq = self.assertEqual
122
123 msg = self._msgobj('msg_08.txt')
124 charsets = msg.get_charsets()
125 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
126
127 msg = self._msgobj('msg_09.txt')
128 charsets = msg.get_charsets('dingbat')
129 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
130 'koi8-r'])
131
132 msg = self._msgobj('msg_12.txt')
133 charsets = msg.get_charsets()
134 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
135 'iso-8859-3', 'us-ascii', 'koi8-r'])
136
137 def test_get_filename(self):
138 eq = self.assertEqual
139
140 msg = self._msgobj('msg_04.txt')
141 filenames = [p.get_filename() for p in msg.get_payload()]
142 eq(filenames, ['msg.txt', 'msg.txt'])
143
144 msg = self._msgobj('msg_07.txt')
145 subpart = msg.get_payload(1)
146 eq(subpart.get_filename(), 'dingusfish.gif')
147
148 def test_get_filename_with_name_parameter(self):
149 eq = self.assertEqual
150
151 msg = self._msgobj('msg_44.txt')
152 filenames = [p.get_filename() for p in msg.get_payload()]
153 eq(filenames, ['msg.txt', 'msg.txt'])
154
155 def test_get_boundary(self):
156 eq = self.assertEqual
157 msg = self._msgobj('msg_07.txt')
158 # No quotes!
159 eq(msg.get_boundary(), 'BOUNDARY')
160
161 def test_set_boundary(self):
162 eq = self.assertEqual
163 # This one has no existing boundary parameter, but the Content-Type:
164 # header appears fifth.
165 msg = self._msgobj('msg_01.txt')
166 msg.set_boundary('BOUNDARY')
167 header, value = msg.items()[4]
168 eq(header.lower(), 'content-type')
169 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
170 # This one has a Content-Type: header, with a boundary, stuck in the
171 # middle of its headers. Make sure the order is preserved; it should
172 # be fifth.
173 msg = self._msgobj('msg_04.txt')
174 msg.set_boundary('BOUNDARY')
175 header, value = msg.items()[4]
176 eq(header.lower(), 'content-type')
177 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
178 # And this one has no Content-Type: header at all.
179 msg = self._msgobj('msg_03.txt')
180 self.assertRaises(errors.HeaderParseError,
181 msg.set_boundary, 'BOUNDARY')
182
R. David Murray73a559d2010-12-21 18:07:59 +0000183 def test_make_boundary(self):
184 msg = MIMEMultipart('form-data')
185 # Note that when the boundary gets created is an implementation
186 # detail and might change.
187 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
188 # Trigger creation of boundary
189 msg.as_string()
190 self.assertEqual(msg.items()[0][1][:33],
191 'multipart/form-data; boundary="==')
192 # XXX: there ought to be tests of the uniqueness of the boundary, too.
193
R. David Murray57c45ac2010-02-21 04:39:40 +0000194 def test_message_rfc822_only(self):
195 # Issue 7970: message/rfc822 not in multipart parsed by
196 # HeaderParser caused an exception when flattened.
Brett Cannon384917a2010-10-29 23:08:36 +0000197 with openfile(findfile('msg_46.txt')) as fp:
198 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000199 parser = HeaderParser()
200 msg = parser.parsestr(msgdata)
201 out = StringIO()
202 gen = Generator(out, True, 0)
203 gen.flatten(msg, False)
204 self.assertEqual(out.getvalue(), msgdata)
205
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000206 def test_get_decoded_payload(self):
207 eq = self.assertEqual
208 msg = self._msgobj('msg_10.txt')
209 # The outer message is a multipart
210 eq(msg.get_payload(decode=True), None)
211 # Subpart 1 is 7bit encoded
212 eq(msg.get_payload(0).get_payload(decode=True),
213 b'This is a 7bit encoded message.\n')
214 # Subpart 2 is quopri
215 eq(msg.get_payload(1).get_payload(decode=True),
216 b'\xa1This is a Quoted Printable encoded message!\n')
217 # Subpart 3 is base64
218 eq(msg.get_payload(2).get_payload(decode=True),
219 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000220 # Subpart 4 is base64 with a trailing newline, which
221 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000222 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000223 b'This is a Base64 encoded message.\n')
224 # Subpart 5 has no Content-Transfer-Encoding: header.
225 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000226 b'This has no Content-Transfer-Encoding: header.\n')
227
228 def test_get_decoded_uu_payload(self):
229 eq = self.assertEqual
230 msg = Message()
231 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
232 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
233 msg['content-transfer-encoding'] = cte
234 eq(msg.get_payload(decode=True), b'hello world')
235 # Now try some bogus data
236 msg.set_payload('foo')
237 eq(msg.get_payload(decode=True), b'foo')
238
239 def test_decoded_generator(self):
240 eq = self.assertEqual
241 msg = self._msgobj('msg_07.txt')
242 with openfile('msg_17.txt') as fp:
243 text = fp.read()
244 s = StringIO()
245 g = DecodedGenerator(s)
246 g.flatten(msg)
247 eq(s.getvalue(), text)
248
249 def test__contains__(self):
250 msg = Message()
251 msg['From'] = 'Me'
252 msg['to'] = 'You'
253 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000254 self.assertTrue('from' in msg)
255 self.assertTrue('From' in msg)
256 self.assertTrue('FROM' in msg)
257 self.assertTrue('to' in msg)
258 self.assertTrue('To' in msg)
259 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000260
261 def test_as_string(self):
262 eq = self.ndiffAssertEqual
263 msg = self._msgobj('msg_01.txt')
264 with openfile('msg_01.txt') as fp:
265 text = fp.read()
266 eq(text, str(msg))
267 fullrepr = msg.as_string(unixfrom=True)
268 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000269 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000270 eq(text, NL.join(lines[1:]))
271
272 def test_bad_param(self):
273 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
274 self.assertEqual(msg.get_param('baz'), '')
275
276 def test_missing_filename(self):
277 msg = email.message_from_string("From: foo\n")
278 self.assertEqual(msg.get_filename(), None)
279
280 def test_bogus_filename(self):
281 msg = email.message_from_string(
282 "Content-Disposition: blarg; filename\n")
283 self.assertEqual(msg.get_filename(), '')
284
285 def test_missing_boundary(self):
286 msg = email.message_from_string("From: foo\n")
287 self.assertEqual(msg.get_boundary(), None)
288
289 def test_get_params(self):
290 eq = self.assertEqual
291 msg = email.message_from_string(
292 'X-Header: foo=one; bar=two; baz=three\n')
293 eq(msg.get_params(header='x-header'),
294 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
295 msg = email.message_from_string(
296 'X-Header: foo; bar=one; baz=two\n')
297 eq(msg.get_params(header='x-header'),
298 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
299 eq(msg.get_params(), None)
300 msg = email.message_from_string(
301 'X-Header: foo; bar="one"; baz=two\n')
302 eq(msg.get_params(header='x-header'),
303 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
304
305 def test_get_param_liberal(self):
306 msg = Message()
307 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
308 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
309
310 def test_get_param(self):
311 eq = self.assertEqual
312 msg = email.message_from_string(
313 "X-Header: foo=one; bar=two; baz=three\n")
314 eq(msg.get_param('bar', header='x-header'), 'two')
315 eq(msg.get_param('quuz', header='x-header'), None)
316 eq(msg.get_param('quuz'), None)
317 msg = email.message_from_string(
318 'X-Header: foo; bar="one"; baz=two\n')
319 eq(msg.get_param('foo', header='x-header'), '')
320 eq(msg.get_param('bar', header='x-header'), 'one')
321 eq(msg.get_param('baz', header='x-header'), 'two')
322 # XXX: We are not RFC-2045 compliant! We cannot parse:
323 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
324 # msg.get_param("weird")
325 # yet.
326
327 def test_get_param_funky_continuation_lines(self):
328 msg = self._msgobj('msg_22.txt')
329 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
330
331 def test_get_param_with_semis_in_quotes(self):
332 msg = email.message_from_string(
333 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
334 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
335 self.assertEqual(msg.get_param('name', unquote=False),
336 '"Jim&amp;&amp;Jill"')
337
R. David Murrayd48739f2010-04-14 18:59:18 +0000338 def test_get_param_with_quotes(self):
339 msg = email.message_from_string(
340 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
341 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
342 msg = email.message_from_string(
343 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
344 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
345
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000346 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000347 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000348 msg = email.message_from_string('Header: exists')
349 unless('header' in msg)
350 unless('Header' in msg)
351 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000352 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000353
354 def test_set_param(self):
355 eq = self.assertEqual
356 msg = Message()
357 msg.set_param('charset', 'iso-2022-jp')
358 eq(msg.get_param('charset'), 'iso-2022-jp')
359 msg.set_param('importance', 'high value')
360 eq(msg.get_param('importance'), 'high value')
361 eq(msg.get_param('importance', unquote=False), '"high value"')
362 eq(msg.get_params(), [('text/plain', ''),
363 ('charset', 'iso-2022-jp'),
364 ('importance', 'high value')])
365 eq(msg.get_params(unquote=False), [('text/plain', ''),
366 ('charset', '"iso-2022-jp"'),
367 ('importance', '"high value"')])
368 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
369 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
370
371 def test_del_param(self):
372 eq = self.assertEqual
373 msg = self._msgobj('msg_05.txt')
374 eq(msg.get_params(),
375 [('multipart/report', ''), ('report-type', 'delivery-status'),
376 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
377 old_val = msg.get_param("report-type")
378 msg.del_param("report-type")
379 eq(msg.get_params(),
380 [('multipart/report', ''),
381 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
382 msg.set_param("report-type", old_val)
383 eq(msg.get_params(),
384 [('multipart/report', ''),
385 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
386 ('report-type', old_val)])
387
388 def test_del_param_on_other_header(self):
389 msg = Message()
390 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
391 msg.del_param('filename', 'content-disposition')
392 self.assertEqual(msg['content-disposition'], 'attachment')
393
394 def test_set_type(self):
395 eq = self.assertEqual
396 msg = Message()
397 self.assertRaises(ValueError, msg.set_type, 'text')
398 msg.set_type('text/plain')
399 eq(msg['content-type'], 'text/plain')
400 msg.set_param('charset', 'us-ascii')
401 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
402 msg.set_type('text/html')
403 eq(msg['content-type'], 'text/html; charset="us-ascii"')
404
405 def test_set_type_on_other_header(self):
406 msg = Message()
407 msg['X-Content-Type'] = 'text/plain'
408 msg.set_type('application/octet-stream', 'X-Content-Type')
409 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
410
411 def test_get_content_type_missing(self):
412 msg = Message()
413 self.assertEqual(msg.get_content_type(), 'text/plain')
414
415 def test_get_content_type_missing_with_default_type(self):
416 msg = Message()
417 msg.set_default_type('message/rfc822')
418 self.assertEqual(msg.get_content_type(), 'message/rfc822')
419
420 def test_get_content_type_from_message_implicit(self):
421 msg = self._msgobj('msg_30.txt')
422 self.assertEqual(msg.get_payload(0).get_content_type(),
423 'message/rfc822')
424
425 def test_get_content_type_from_message_explicit(self):
426 msg = self._msgobj('msg_28.txt')
427 self.assertEqual(msg.get_payload(0).get_content_type(),
428 'message/rfc822')
429
430 def test_get_content_type_from_message_text_plain_implicit(self):
431 msg = self._msgobj('msg_03.txt')
432 self.assertEqual(msg.get_content_type(), 'text/plain')
433
434 def test_get_content_type_from_message_text_plain_explicit(self):
435 msg = self._msgobj('msg_01.txt')
436 self.assertEqual(msg.get_content_type(), 'text/plain')
437
438 def test_get_content_maintype_missing(self):
439 msg = Message()
440 self.assertEqual(msg.get_content_maintype(), 'text')
441
442 def test_get_content_maintype_missing_with_default_type(self):
443 msg = Message()
444 msg.set_default_type('message/rfc822')
445 self.assertEqual(msg.get_content_maintype(), 'message')
446
447 def test_get_content_maintype_from_message_implicit(self):
448 msg = self._msgobj('msg_30.txt')
449 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
450
451 def test_get_content_maintype_from_message_explicit(self):
452 msg = self._msgobj('msg_28.txt')
453 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
454
455 def test_get_content_maintype_from_message_text_plain_implicit(self):
456 msg = self._msgobj('msg_03.txt')
457 self.assertEqual(msg.get_content_maintype(), 'text')
458
459 def test_get_content_maintype_from_message_text_plain_explicit(self):
460 msg = self._msgobj('msg_01.txt')
461 self.assertEqual(msg.get_content_maintype(), 'text')
462
463 def test_get_content_subtype_missing(self):
464 msg = Message()
465 self.assertEqual(msg.get_content_subtype(), 'plain')
466
467 def test_get_content_subtype_missing_with_default_type(self):
468 msg = Message()
469 msg.set_default_type('message/rfc822')
470 self.assertEqual(msg.get_content_subtype(), 'rfc822')
471
472 def test_get_content_subtype_from_message_implicit(self):
473 msg = self._msgobj('msg_30.txt')
474 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
475
476 def test_get_content_subtype_from_message_explicit(self):
477 msg = self._msgobj('msg_28.txt')
478 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
479
480 def test_get_content_subtype_from_message_text_plain_implicit(self):
481 msg = self._msgobj('msg_03.txt')
482 self.assertEqual(msg.get_content_subtype(), 'plain')
483
484 def test_get_content_subtype_from_message_text_plain_explicit(self):
485 msg = self._msgobj('msg_01.txt')
486 self.assertEqual(msg.get_content_subtype(), 'plain')
487
488 def test_get_content_maintype_error(self):
489 msg = Message()
490 msg['Content-Type'] = 'no-slash-in-this-string'
491 self.assertEqual(msg.get_content_maintype(), 'text')
492
493 def test_get_content_subtype_error(self):
494 msg = Message()
495 msg['Content-Type'] = 'no-slash-in-this-string'
496 self.assertEqual(msg.get_content_subtype(), 'plain')
497
498 def test_replace_header(self):
499 eq = self.assertEqual
500 msg = Message()
501 msg.add_header('First', 'One')
502 msg.add_header('Second', 'Two')
503 msg.add_header('Third', 'Three')
504 eq(msg.keys(), ['First', 'Second', 'Third'])
505 eq(msg.values(), ['One', 'Two', 'Three'])
506 msg.replace_header('Second', 'Twenty')
507 eq(msg.keys(), ['First', 'Second', 'Third'])
508 eq(msg.values(), ['One', 'Twenty', 'Three'])
509 msg.add_header('First', 'Eleven')
510 msg.replace_header('First', 'One Hundred')
511 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
512 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
513 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
514
515 def test_broken_base64_payload(self):
516 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
517 msg = Message()
518 msg['content-type'] = 'audio/x-midi'
519 msg['content-transfer-encoding'] = 'base64'
520 msg.set_payload(x)
521 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000522 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000523
R. David Murray7ec754b2010-12-13 23:51:19 +0000524 # Issue 1078919
525 def test_ascii_add_header(self):
526 msg = Message()
527 msg.add_header('Content-Disposition', 'attachment',
528 filename='bud.gif')
529 self.assertEqual('attachment; filename="bud.gif"',
530 msg['Content-Disposition'])
531
532 def test_noascii_add_header(self):
533 msg = Message()
534 msg.add_header('Content-Disposition', 'attachment',
535 filename="Fußballer.ppt")
536 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000537 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000538 msg['Content-Disposition'])
539
540 def test_nonascii_add_header_via_triple(self):
541 msg = Message()
542 msg.add_header('Content-Disposition', 'attachment',
543 filename=('iso-8859-1', '', 'Fußballer.ppt'))
544 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000545 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
546 msg['Content-Disposition'])
547
548 def test_ascii_add_header_with_tspecial(self):
549 msg = Message()
550 msg.add_header('Content-Disposition', 'attachment',
551 filename="windows [filename].ppt")
552 self.assertEqual(
553 'attachment; filename="windows [filename].ppt"',
554 msg['Content-Disposition'])
555
556 def test_nonascii_add_header_with_tspecial(self):
557 msg = Message()
558 msg.add_header('Content-Disposition', 'attachment',
559 filename="Fußballer [filename].ppt")
560 self.assertEqual(
561 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000562 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000563
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000564 # Issue 5871: reject an attempt to embed a header inside a header value
565 # (header injection attack).
566 def test_embeded_header_via_Header_rejected(self):
567 msg = Message()
568 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
569 self.assertRaises(errors.HeaderParseError, msg.as_string)
570
571 def test_embeded_header_via_string_rejected(self):
572 msg = Message()
573 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
574 self.assertRaises(errors.HeaderParseError, msg.as_string)
575
Ezio Melottib3aedd42010-11-20 19:04:17 +0000576
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000577# Test the email.encoders module
578class TestEncoders(unittest.TestCase):
579 def test_encode_empty_payload(self):
580 eq = self.assertEqual
581 msg = Message()
582 msg.set_charset('us-ascii')
583 eq(msg['content-transfer-encoding'], '7bit')
584
585 def test_default_cte(self):
586 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000587 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000588 msg = MIMEText('hello world')
589 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000590 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000591 msg = MIMEText('hello \xf8 world')
592 eq(msg['content-transfer-encoding'], '8bit')
593 # And now with a different charset
594 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
595 eq(msg['content-transfer-encoding'], 'quoted-printable')
596
R. David Murraye85200d2010-05-06 01:41:14 +0000597 def test_encode7or8bit(self):
598 # Make sure a charset whose input character set is 8bit but
599 # whose output character set is 7bit gets a transfer-encoding
600 # of 7bit.
601 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000602 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000603 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000604
Ezio Melottib3aedd42010-11-20 19:04:17 +0000605
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000606# Test long header wrapping
607class TestLongHeaders(TestEmailBase):
608 def test_split_long_continuation(self):
609 eq = self.ndiffAssertEqual
610 msg = email.message_from_string("""\
611Subject: bug demonstration
612\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
613\tmore text
614
615test
616""")
617 sfp = StringIO()
618 g = Generator(sfp)
619 g.flatten(msg)
620 eq(sfp.getvalue(), """\
621Subject: bug demonstration
622\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
623\tmore text
624
625test
626""")
627
628 def test_another_long_almost_unsplittable_header(self):
629 eq = self.ndiffAssertEqual
630 hstr = """\
631bug demonstration
632\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
633\tmore text"""
634 h = Header(hstr, continuation_ws='\t')
635 eq(h.encode(), """\
636bug demonstration
637\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
638\tmore text""")
639 h = Header(hstr.replace('\t', ' '))
640 eq(h.encode(), """\
641bug demonstration
642 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
643 more text""")
644
645 def test_long_nonstring(self):
646 eq = self.ndiffAssertEqual
647 g = Charset("iso-8859-1")
648 cz = Charset("iso-8859-2")
649 utf8 = Charset("utf-8")
650 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
651 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
652 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
653 b'bef\xf6rdert. ')
654 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
655 b'd\xf9vtipu.. ')
656 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
657 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
658 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
659 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
660 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
661 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
662 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
663 '\u3044\u307e\u3059\u3002')
664 h = Header(g_head, g, header_name='Subject')
665 h.append(cz_head, cz)
666 h.append(utf8_head, utf8)
667 msg = Message()
668 msg['Subject'] = h
669 sfp = StringIO()
670 g = Generator(sfp)
671 g.flatten(msg)
672 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000673Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
674 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
675 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
676 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
677 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
678 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
679 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
680 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
681 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
682 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
683 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000684
685""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000686 eq(h.encode(maxlinelen=76), """\
687=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
688 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
689 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
690 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
691 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
692 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
693 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
694 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
695 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
696 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
697 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000698
699 def test_long_header_encode(self):
700 eq = self.ndiffAssertEqual
701 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
702 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
703 header_name='X-Foobar-Spoink-Defrobnit')
704 eq(h.encode(), '''\
705wasnipoop; giraffes="very-long-necked-animals";
706 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
707
708 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
709 eq = self.ndiffAssertEqual
710 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
711 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
712 header_name='X-Foobar-Spoink-Defrobnit',
713 continuation_ws='\t')
714 eq(h.encode(), '''\
715wasnipoop; giraffes="very-long-necked-animals";
716 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
717
718 def test_long_header_encode_with_tab_continuation(self):
719 eq = self.ndiffAssertEqual
720 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
721 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
722 header_name='X-Foobar-Spoink-Defrobnit',
723 continuation_ws='\t')
724 eq(h.encode(), '''\
725wasnipoop; giraffes="very-long-necked-animals";
726\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
727
728 def test_header_splitter(self):
729 eq = self.ndiffAssertEqual
730 msg = MIMEText('')
731 # It'd be great if we could use add_header() here, but that doesn't
732 # guarantee an order of the parameters.
733 msg['X-Foobar-Spoink-Defrobnit'] = (
734 'wasnipoop; giraffes="very-long-necked-animals"; '
735 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
736 sfp = StringIO()
737 g = Generator(sfp)
738 g.flatten(msg)
739 eq(sfp.getvalue(), '''\
740Content-Type: text/plain; charset="us-ascii"
741MIME-Version: 1.0
742Content-Transfer-Encoding: 7bit
743X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
744 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
745
746''')
747
748 def test_no_semis_header_splitter(self):
749 eq = self.ndiffAssertEqual
750 msg = Message()
751 msg['From'] = 'test@dom.ain'
752 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
753 msg.set_payload('Test')
754 sfp = StringIO()
755 g = Generator(sfp)
756 g.flatten(msg)
757 eq(sfp.getvalue(), """\
758From: test@dom.ain
759References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
760 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
761
762Test""")
763
764 def test_no_split_long_header(self):
765 eq = self.ndiffAssertEqual
766 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000767 h = Header(hstr)
768 # These come on two lines because Headers are really field value
769 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000770 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000771References:
772 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
773 h = Header('x' * 80)
774 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000775
776 def test_splitting_multiple_long_lines(self):
777 eq = self.ndiffAssertEqual
778 hstr = """\
779from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
780\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
781\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
782"""
783 h = Header(hstr, continuation_ws='\t')
784 eq(h.encode(), """\
785from babylon.socal-raves.org (localhost [127.0.0.1]);
786 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
787 for <mailman-admin@babylon.socal-raves.org>;
788 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
789\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
790 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
791 for <mailman-admin@babylon.socal-raves.org>;
792 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
793\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
794 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
795 for <mailman-admin@babylon.socal-raves.org>;
796 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
797
798 def test_splitting_first_line_only_is_long(self):
799 eq = self.ndiffAssertEqual
800 hstr = """\
801from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
802\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
803\tid 17k4h5-00034i-00
804\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
805 h = Header(hstr, maxlinelen=78, header_name='Received',
806 continuation_ws='\t')
807 eq(h.encode(), """\
808from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
809 helo=cthulhu.gerg.ca)
810\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
811\tid 17k4h5-00034i-00
812\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
813
814 def test_long_8bit_header(self):
815 eq = self.ndiffAssertEqual
816 msg = Message()
817 h = Header('Britische Regierung gibt', 'iso-8859-1',
818 header_name='Subject')
819 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +0000820 eq(h.encode(maxlinelen=76), """\
821=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
822 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000823 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +0000824 eq(msg.as_string(maxheaderlen=76), """\
825Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
826 =?iso-8859-1?q?hore-Windkraftprojekte?=
827
828""")
829 eq(msg.as_string(maxheaderlen=0), """\
830Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000831
832""")
833
834 def test_long_8bit_header_no_charset(self):
835 eq = self.ndiffAssertEqual
836 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +0000837 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
838 'f\xfcr Offshore-Windkraftprojekte '
839 '<a-very-long-address@example.com>')
840 msg['Reply-To'] = header_string
841 self.assertRaises(UnicodeEncodeError, msg.as_string)
842 msg = Message()
843 msg['Reply-To'] = Header(header_string, 'utf-8',
844 header_name='Reply-To')
845 eq(msg.as_string(maxheaderlen=78), """\
846Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
847 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000848
849""")
850
851 def test_long_to_header(self):
852 eq = self.ndiffAssertEqual
853 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
854 '<someone@eecs.umich.edu>,'
855 '"Someone Test #B" <someone@umich.edu>, '
856 '"Someone Test #C" <someone@eecs.umich.edu>, '
857 '"Someone Test #D" <someone@eecs.umich.edu>')
858 msg = Message()
859 msg['To'] = to
860 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +0000861To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000862 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +0000863 "Someone Test #C" <someone@eecs.umich.edu>,
864 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000865
866''')
867
868 def test_long_line_after_append(self):
869 eq = self.ndiffAssertEqual
870 s = 'This is an example of string which has almost the limit of header length.'
871 h = Header(s)
872 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +0000873 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000874This is an example of string which has almost the limit of header length.
875 Add another line.""")
876
877 def test_shorter_line_with_append(self):
878 eq = self.ndiffAssertEqual
879 s = 'This is a shorter line.'
880 h = Header(s)
881 h.append('Add another sentence. (Surprise?)')
882 eq(h.encode(),
883 'This is a shorter line. Add another sentence. (Surprise?)')
884
885 def test_long_field_name(self):
886 eq = self.ndiffAssertEqual
887 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +0000888 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
889 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
890 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
891 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000892 h = Header(gs, 'iso-8859-1', header_name=fn)
893 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +0000894 eq(h.encode(maxlinelen=76), """\
895=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
896 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
897 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
898 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000899
900 def test_long_received_header(self):
901 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
902 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
903 'Wed, 05 Mar 2003 18:10:18 -0700')
904 msg = Message()
905 msg['Received-1'] = Header(h, continuation_ws='\t')
906 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000907 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000908 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000909Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
910 Wed, 05 Mar 2003 18:10:18 -0700
911Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
912 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000913
914""")
915
916 def test_string_headerinst_eq(self):
917 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
918 'tu-muenchen.de> (David Bremner\'s message of '
919 '"Thu, 6 Mar 2003 13:58:21 +0100")')
920 msg = Message()
921 msg['Received-1'] = Header(h, header_name='Received-1',
922 continuation_ws='\t')
923 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000924 # XXX This should be splitting on spaces not commas.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000925 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000926Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
927 6 Mar 2003 13:58:21 +0100\")
928Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
929 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000930
931""")
932
933 def test_long_unbreakable_lines_with_continuation(self):
934 eq = self.ndiffAssertEqual
935 msg = Message()
936 t = """\
937iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
938 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
939 msg['Face-1'] = t
940 msg['Face-2'] = Header(t, header_name='Face-2')
Barry Warsawbef9d212007-08-31 10:55:37 +0000941 # XXX This splitting is all wrong. It the first value line should be
942 # snug against the field name.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000943 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +0000944Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000945 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000946 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +0000947Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +0000948 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000949 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
950
951""")
952
953 def test_another_long_multiline_header(self):
954 eq = self.ndiffAssertEqual
955 m = ('Received: from siimage.com '
956 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +0000957 'Microsoft SMTPSVC(5.0.2195.4905); '
958 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000959 msg = email.message_from_string(m)
960 eq(msg.as_string(maxheaderlen=78), '''\
Barry Warsawbef9d212007-08-31 10:55:37 +0000961Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
962 Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000963
964''')
965
966 def test_long_lines_with_different_header(self):
967 eq = self.ndiffAssertEqual
968 h = ('List-Unsubscribe: '
969 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
970 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
971 '?subject=unsubscribe>')
972 msg = Message()
973 msg['List'] = h
974 msg['List'] = Header(h, header_name='List')
975 eq(msg.as_string(maxheaderlen=78), """\
976List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000977 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000978List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000979 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000980
981""")
982
R. David Murray6f0022d2011-01-07 21:57:25 +0000983 def test_long_rfc2047_header_with_embedded_fws(self):
984 h = Header(textwrap.dedent("""\
985 We're going to pretend this header is in a non-ascii character set
986 \tto see if line wrapping with encoded words and embedded
987 folding white space works"""),
988 charset='utf-8',
989 header_name='Test')
990 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
991 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
992 =?utf-8?q?cter_set?=
993 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
994 =?utf-8?q?_folding_white_space_works?=""")+'\n')
995
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000996
Ezio Melottib3aedd42010-11-20 19:04:17 +0000997
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000998# Test mangling of "From " lines in the body of a message
999class TestFromMangling(unittest.TestCase):
1000 def setUp(self):
1001 self.msg = Message()
1002 self.msg['From'] = 'aaa@bbb.org'
1003 self.msg.set_payload("""\
1004From the desk of A.A.A.:
1005Blah blah blah
1006""")
1007
1008 def test_mangled_from(self):
1009 s = StringIO()
1010 g = Generator(s, mangle_from_=True)
1011 g.flatten(self.msg)
1012 self.assertEqual(s.getvalue(), """\
1013From: aaa@bbb.org
1014
1015>From the desk of A.A.A.:
1016Blah blah blah
1017""")
1018
1019 def test_dont_mangle_from(self):
1020 s = StringIO()
1021 g = Generator(s, mangle_from_=False)
1022 g.flatten(self.msg)
1023 self.assertEqual(s.getvalue(), """\
1024From: aaa@bbb.org
1025
1026From the desk of A.A.A.:
1027Blah blah blah
1028""")
1029
1030
Ezio Melottib3aedd42010-11-20 19:04:17 +00001031
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001032# Test the basic MIMEAudio class
1033class TestMIMEAudio(unittest.TestCase):
1034 def setUp(self):
1035 # Make sure we pick up the audiotest.au that lives in email/test/data.
1036 # In Python, there's an audiotest.au living in Lib/test but that isn't
1037 # included in some binary distros that don't include the test
1038 # package. The trailing empty string on the .join() is significant
1039 # since findfile() will do a dirname().
1040 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
1041 with open(findfile('audiotest.au', datadir), 'rb') as fp:
1042 self._audiodata = fp.read()
1043 self._au = MIMEAudio(self._audiodata)
1044
1045 def test_guess_minor_type(self):
1046 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1047
1048 def test_encoding(self):
1049 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001050 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1051 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001052
1053 def test_checkSetMinor(self):
1054 au = MIMEAudio(self._audiodata, 'fish')
1055 self.assertEqual(au.get_content_type(), 'audio/fish')
1056
1057 def test_add_header(self):
1058 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001059 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001060 self._au.add_header('Content-Disposition', 'attachment',
1061 filename='audiotest.au')
1062 eq(self._au['content-disposition'],
1063 'attachment; filename="audiotest.au"')
1064 eq(self._au.get_params(header='content-disposition'),
1065 [('attachment', ''), ('filename', 'audiotest.au')])
1066 eq(self._au.get_param('filename', header='content-disposition'),
1067 'audiotest.au')
1068 missing = []
1069 eq(self._au.get_param('attachment', header='content-disposition'), '')
1070 unless(self._au.get_param('foo', failobj=missing,
1071 header='content-disposition') is missing)
1072 # Try some missing stuff
1073 unless(self._au.get_param('foobar', missing) is missing)
1074 unless(self._au.get_param('attachment', missing,
1075 header='foobar') is missing)
1076
1077
Ezio Melottib3aedd42010-11-20 19:04:17 +00001078
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001079# Test the basic MIMEImage class
1080class TestMIMEImage(unittest.TestCase):
1081 def setUp(self):
1082 with openfile('PyBanner048.gif', 'rb') as fp:
1083 self._imgdata = fp.read()
1084 self._im = MIMEImage(self._imgdata)
1085
1086 def test_guess_minor_type(self):
1087 self.assertEqual(self._im.get_content_type(), 'image/gif')
1088
1089 def test_encoding(self):
1090 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001091 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1092 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001093
1094 def test_checkSetMinor(self):
1095 im = MIMEImage(self._imgdata, 'fish')
1096 self.assertEqual(im.get_content_type(), 'image/fish')
1097
1098 def test_add_header(self):
1099 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001100 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001101 self._im.add_header('Content-Disposition', 'attachment',
1102 filename='dingusfish.gif')
1103 eq(self._im['content-disposition'],
1104 'attachment; filename="dingusfish.gif"')
1105 eq(self._im.get_params(header='content-disposition'),
1106 [('attachment', ''), ('filename', 'dingusfish.gif')])
1107 eq(self._im.get_param('filename', header='content-disposition'),
1108 'dingusfish.gif')
1109 missing = []
1110 eq(self._im.get_param('attachment', header='content-disposition'), '')
1111 unless(self._im.get_param('foo', failobj=missing,
1112 header='content-disposition') is missing)
1113 # Try some missing stuff
1114 unless(self._im.get_param('foobar', missing) is missing)
1115 unless(self._im.get_param('attachment', missing,
1116 header='foobar') is missing)
1117
1118
Ezio Melottib3aedd42010-11-20 19:04:17 +00001119
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001120# Test the basic MIMEApplication class
1121class TestMIMEApplication(unittest.TestCase):
1122 def test_headers(self):
1123 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001124 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001125 eq(msg.get_content_type(), 'application/octet-stream')
1126 eq(msg['content-transfer-encoding'], 'base64')
1127
1128 def test_body(self):
1129 eq = self.assertEqual
Barry Warsaw8c571042007-08-30 19:17:18 +00001130 bytes = b'\xfa\xfb\xfc\xfd\xfe\xff'
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001131 msg = MIMEApplication(bytes)
R. David Murray7da8f062010-06-04 16:11:08 +00001132 eq(msg.get_payload(), '+vv8/f7/')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001133 eq(msg.get_payload(decode=True), bytes)
1134
1135
Ezio Melottib3aedd42010-11-20 19:04:17 +00001136
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001137# Test the basic MIMEText class
1138class TestMIMEText(unittest.TestCase):
1139 def setUp(self):
1140 self._msg = MIMEText('hello there')
1141
1142 def test_types(self):
1143 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001144 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001145 eq(self._msg.get_content_type(), 'text/plain')
1146 eq(self._msg.get_param('charset'), 'us-ascii')
1147 missing = []
1148 unless(self._msg.get_param('foobar', missing) is missing)
1149 unless(self._msg.get_param('charset', missing, header='foobar')
1150 is missing)
1151
1152 def test_payload(self):
1153 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001154 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001155
1156 def test_charset(self):
1157 eq = self.assertEqual
1158 msg = MIMEText('hello there', _charset='us-ascii')
1159 eq(msg.get_charset().input_charset, 'us-ascii')
1160 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1161
R. David Murray850fc852010-06-03 01:58:28 +00001162 def test_7bit_input(self):
1163 eq = self.assertEqual
1164 msg = MIMEText('hello there', _charset='us-ascii')
1165 eq(msg.get_charset().input_charset, 'us-ascii')
1166 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1167
1168 def test_7bit_input_no_charset(self):
1169 eq = self.assertEqual
1170 msg = MIMEText('hello there')
1171 eq(msg.get_charset(), 'us-ascii')
1172 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1173 self.assertTrue('hello there' in msg.as_string())
1174
1175 def test_utf8_input(self):
1176 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1177 eq = self.assertEqual
1178 msg = MIMEText(teststr, _charset='utf-8')
1179 eq(msg.get_charset().output_charset, 'utf-8')
1180 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1181 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1182
1183 @unittest.skip("can't fix because of backward compat in email5, "
1184 "will fix in email6")
1185 def test_utf8_input_no_charset(self):
1186 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1187 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1188
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001189
Ezio Melottib3aedd42010-11-20 19:04:17 +00001190
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001191# Test complicated multipart/* messages
1192class TestMultipart(TestEmailBase):
1193 def setUp(self):
1194 with openfile('PyBanner048.gif', 'rb') as fp:
1195 data = fp.read()
1196 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1197 image = MIMEImage(data, name='dingusfish.gif')
1198 image.add_header('content-disposition', 'attachment',
1199 filename='dingusfish.gif')
1200 intro = MIMEText('''\
1201Hi there,
1202
1203This is the dingus fish.
1204''')
1205 container.attach(intro)
1206 container.attach(image)
1207 container['From'] = 'Barry <barry@digicool.com>'
1208 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1209 container['Subject'] = 'Here is your dingus fish'
1210
1211 now = 987809702.54848599
1212 timetuple = time.localtime(now)
1213 if timetuple[-1] == 0:
1214 tzsecs = time.timezone
1215 else:
1216 tzsecs = time.altzone
1217 if tzsecs > 0:
1218 sign = '-'
1219 else:
1220 sign = '+'
1221 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1222 container['Date'] = time.strftime(
1223 '%a, %d %b %Y %H:%M:%S',
1224 time.localtime(now)) + tzoffset
1225 self._msg = container
1226 self._im = image
1227 self._txt = intro
1228
1229 def test_hierarchy(self):
1230 # convenience
1231 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001232 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001233 raises = self.assertRaises
1234 # tests
1235 m = self._msg
1236 unless(m.is_multipart())
1237 eq(m.get_content_type(), 'multipart/mixed')
1238 eq(len(m.get_payload()), 2)
1239 raises(IndexError, m.get_payload, 2)
1240 m0 = m.get_payload(0)
1241 m1 = m.get_payload(1)
1242 unless(m0 is self._txt)
1243 unless(m1 is self._im)
1244 eq(m.get_payload(), [m0, m1])
1245 unless(not m0.is_multipart())
1246 unless(not m1.is_multipart())
1247
1248 def test_empty_multipart_idempotent(self):
1249 text = """\
1250Content-Type: multipart/mixed; boundary="BOUNDARY"
1251MIME-Version: 1.0
1252Subject: A subject
1253To: aperson@dom.ain
1254From: bperson@dom.ain
1255
1256
1257--BOUNDARY
1258
1259
1260--BOUNDARY--
1261"""
1262 msg = Parser().parsestr(text)
1263 self.ndiffAssertEqual(text, msg.as_string())
1264
1265 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1266 outer = MIMEBase('multipart', 'mixed')
1267 outer['Subject'] = 'A subject'
1268 outer['To'] = 'aperson@dom.ain'
1269 outer['From'] = 'bperson@dom.ain'
1270 outer.set_boundary('BOUNDARY')
1271 self.ndiffAssertEqual(outer.as_string(), '''\
1272Content-Type: multipart/mixed; boundary="BOUNDARY"
1273MIME-Version: 1.0
1274Subject: A subject
1275To: aperson@dom.ain
1276From: bperson@dom.ain
1277
1278--BOUNDARY
1279
1280--BOUNDARY--''')
1281
1282 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1283 outer = MIMEBase('multipart', 'mixed')
1284 outer['Subject'] = 'A subject'
1285 outer['To'] = 'aperson@dom.ain'
1286 outer['From'] = 'bperson@dom.ain'
1287 outer.preamble = ''
1288 outer.epilogue = ''
1289 outer.set_boundary('BOUNDARY')
1290 self.ndiffAssertEqual(outer.as_string(), '''\
1291Content-Type: multipart/mixed; boundary="BOUNDARY"
1292MIME-Version: 1.0
1293Subject: A subject
1294To: aperson@dom.ain
1295From: bperson@dom.ain
1296
1297
1298--BOUNDARY
1299
1300--BOUNDARY--
1301''')
1302
1303 def test_one_part_in_a_multipart(self):
1304 eq = self.ndiffAssertEqual
1305 outer = MIMEBase('multipart', 'mixed')
1306 outer['Subject'] = 'A subject'
1307 outer['To'] = 'aperson@dom.ain'
1308 outer['From'] = 'bperson@dom.ain'
1309 outer.set_boundary('BOUNDARY')
1310 msg = MIMEText('hello world')
1311 outer.attach(msg)
1312 eq(outer.as_string(), '''\
1313Content-Type: multipart/mixed; boundary="BOUNDARY"
1314MIME-Version: 1.0
1315Subject: A subject
1316To: aperson@dom.ain
1317From: bperson@dom.ain
1318
1319--BOUNDARY
1320Content-Type: text/plain; charset="us-ascii"
1321MIME-Version: 1.0
1322Content-Transfer-Encoding: 7bit
1323
1324hello world
1325--BOUNDARY--''')
1326
1327 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1328 eq = self.ndiffAssertEqual
1329 outer = MIMEBase('multipart', 'mixed')
1330 outer['Subject'] = 'A subject'
1331 outer['To'] = 'aperson@dom.ain'
1332 outer['From'] = 'bperson@dom.ain'
1333 outer.preamble = ''
1334 msg = MIMEText('hello world')
1335 outer.attach(msg)
1336 outer.set_boundary('BOUNDARY')
1337 eq(outer.as_string(), '''\
1338Content-Type: multipart/mixed; boundary="BOUNDARY"
1339MIME-Version: 1.0
1340Subject: A subject
1341To: aperson@dom.ain
1342From: bperson@dom.ain
1343
1344
1345--BOUNDARY
1346Content-Type: text/plain; charset="us-ascii"
1347MIME-Version: 1.0
1348Content-Transfer-Encoding: 7bit
1349
1350hello world
1351--BOUNDARY--''')
1352
1353
1354 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1355 eq = self.ndiffAssertEqual
1356 outer = MIMEBase('multipart', 'mixed')
1357 outer['Subject'] = 'A subject'
1358 outer['To'] = 'aperson@dom.ain'
1359 outer['From'] = 'bperson@dom.ain'
1360 outer.preamble = None
1361 msg = MIMEText('hello world')
1362 outer.attach(msg)
1363 outer.set_boundary('BOUNDARY')
1364 eq(outer.as_string(), '''\
1365Content-Type: multipart/mixed; boundary="BOUNDARY"
1366MIME-Version: 1.0
1367Subject: A subject
1368To: aperson@dom.ain
1369From: bperson@dom.ain
1370
1371--BOUNDARY
1372Content-Type: text/plain; charset="us-ascii"
1373MIME-Version: 1.0
1374Content-Transfer-Encoding: 7bit
1375
1376hello world
1377--BOUNDARY--''')
1378
1379
1380 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1381 eq = self.ndiffAssertEqual
1382 outer = MIMEBase('multipart', 'mixed')
1383 outer['Subject'] = 'A subject'
1384 outer['To'] = 'aperson@dom.ain'
1385 outer['From'] = 'bperson@dom.ain'
1386 outer.epilogue = None
1387 msg = MIMEText('hello world')
1388 outer.attach(msg)
1389 outer.set_boundary('BOUNDARY')
1390 eq(outer.as_string(), '''\
1391Content-Type: multipart/mixed; boundary="BOUNDARY"
1392MIME-Version: 1.0
1393Subject: A subject
1394To: aperson@dom.ain
1395From: bperson@dom.ain
1396
1397--BOUNDARY
1398Content-Type: text/plain; charset="us-ascii"
1399MIME-Version: 1.0
1400Content-Transfer-Encoding: 7bit
1401
1402hello world
1403--BOUNDARY--''')
1404
1405
1406 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1407 eq = self.ndiffAssertEqual
1408 outer = MIMEBase('multipart', 'mixed')
1409 outer['Subject'] = 'A subject'
1410 outer['To'] = 'aperson@dom.ain'
1411 outer['From'] = 'bperson@dom.ain'
1412 outer.epilogue = ''
1413 msg = MIMEText('hello world')
1414 outer.attach(msg)
1415 outer.set_boundary('BOUNDARY')
1416 eq(outer.as_string(), '''\
1417Content-Type: multipart/mixed; boundary="BOUNDARY"
1418MIME-Version: 1.0
1419Subject: A subject
1420To: aperson@dom.ain
1421From: bperson@dom.ain
1422
1423--BOUNDARY
1424Content-Type: text/plain; charset="us-ascii"
1425MIME-Version: 1.0
1426Content-Transfer-Encoding: 7bit
1427
1428hello world
1429--BOUNDARY--
1430''')
1431
1432
1433 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1434 eq = self.ndiffAssertEqual
1435 outer = MIMEBase('multipart', 'mixed')
1436 outer['Subject'] = 'A subject'
1437 outer['To'] = 'aperson@dom.ain'
1438 outer['From'] = 'bperson@dom.ain'
1439 outer.epilogue = '\n'
1440 msg = MIMEText('hello world')
1441 outer.attach(msg)
1442 outer.set_boundary('BOUNDARY')
1443 eq(outer.as_string(), '''\
1444Content-Type: multipart/mixed; boundary="BOUNDARY"
1445MIME-Version: 1.0
1446Subject: A subject
1447To: aperson@dom.ain
1448From: bperson@dom.ain
1449
1450--BOUNDARY
1451Content-Type: text/plain; charset="us-ascii"
1452MIME-Version: 1.0
1453Content-Transfer-Encoding: 7bit
1454
1455hello world
1456--BOUNDARY--
1457
1458''')
1459
1460 def test_message_external_body(self):
1461 eq = self.assertEqual
1462 msg = self._msgobj('msg_36.txt')
1463 eq(len(msg.get_payload()), 2)
1464 msg1 = msg.get_payload(1)
1465 eq(msg1.get_content_type(), 'multipart/alternative')
1466 eq(len(msg1.get_payload()), 2)
1467 for subpart in msg1.get_payload():
1468 eq(subpart.get_content_type(), 'message/external-body')
1469 eq(len(subpart.get_payload()), 1)
1470 subsubpart = subpart.get_payload(0)
1471 eq(subsubpart.get_content_type(), 'text/plain')
1472
1473 def test_double_boundary(self):
1474 # msg_37.txt is a multipart that contains two dash-boundary's in a
1475 # row. Our interpretation of RFC 2046 calls for ignoring the second
1476 # and subsequent boundaries.
1477 msg = self._msgobj('msg_37.txt')
1478 self.assertEqual(len(msg.get_payload()), 3)
1479
1480 def test_nested_inner_contains_outer_boundary(self):
1481 eq = self.ndiffAssertEqual
1482 # msg_38.txt has an inner part that contains outer boundaries. My
1483 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1484 # these are illegal and should be interpreted as unterminated inner
1485 # parts.
1486 msg = self._msgobj('msg_38.txt')
1487 sfp = StringIO()
1488 iterators._structure(msg, sfp)
1489 eq(sfp.getvalue(), """\
1490multipart/mixed
1491 multipart/mixed
1492 multipart/alternative
1493 text/plain
1494 text/plain
1495 text/plain
1496 text/plain
1497""")
1498
1499 def test_nested_with_same_boundary(self):
1500 eq = self.ndiffAssertEqual
1501 # msg 39.txt is similarly evil in that it's got inner parts that use
1502 # the same boundary as outer parts. Again, I believe the way this is
1503 # parsed is closest to the spirit of RFC 2046
1504 msg = self._msgobj('msg_39.txt')
1505 sfp = StringIO()
1506 iterators._structure(msg, sfp)
1507 eq(sfp.getvalue(), """\
1508multipart/mixed
1509 multipart/mixed
1510 multipart/alternative
1511 application/octet-stream
1512 application/octet-stream
1513 text/plain
1514""")
1515
1516 def test_boundary_in_non_multipart(self):
1517 msg = self._msgobj('msg_40.txt')
1518 self.assertEqual(msg.as_string(), '''\
1519MIME-Version: 1.0
1520Content-Type: text/html; boundary="--961284236552522269"
1521
1522----961284236552522269
1523Content-Type: text/html;
1524Content-Transfer-Encoding: 7Bit
1525
1526<html></html>
1527
1528----961284236552522269--
1529''')
1530
1531 def test_boundary_with_leading_space(self):
1532 eq = self.assertEqual
1533 msg = email.message_from_string('''\
1534MIME-Version: 1.0
1535Content-Type: multipart/mixed; boundary=" XXXX"
1536
1537-- XXXX
1538Content-Type: text/plain
1539
1540
1541-- XXXX
1542Content-Type: text/plain
1543
1544-- XXXX--
1545''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001546 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001547 eq(msg.get_boundary(), ' XXXX')
1548 eq(len(msg.get_payload()), 2)
1549
1550 def test_boundary_without_trailing_newline(self):
1551 m = Parser().parsestr("""\
1552Content-Type: multipart/mixed; boundary="===============0012394164=="
1553MIME-Version: 1.0
1554
1555--===============0012394164==
1556Content-Type: image/file1.jpg
1557MIME-Version: 1.0
1558Content-Transfer-Encoding: base64
1559
1560YXNkZg==
1561--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001562 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001563
1564
Ezio Melottib3aedd42010-11-20 19:04:17 +00001565
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001566# Test some badly formatted messages
1567class TestNonConformant(TestEmailBase):
1568 def test_parse_missing_minor_type(self):
1569 eq = self.assertEqual
1570 msg = self._msgobj('msg_14.txt')
1571 eq(msg.get_content_type(), 'text/plain')
1572 eq(msg.get_content_maintype(), 'text')
1573 eq(msg.get_content_subtype(), 'plain')
1574
1575 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001576 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001577 msg = self._msgobj('msg_15.txt')
1578 # XXX We can probably eventually do better
1579 inner = msg.get_payload(0)
1580 unless(hasattr(inner, 'defects'))
1581 self.assertEqual(len(inner.defects), 1)
1582 unless(isinstance(inner.defects[0],
1583 errors.StartBoundaryNotFoundDefect))
1584
1585 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001586 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001587 msg = self._msgobj('msg_25.txt')
1588 unless(isinstance(msg.get_payload(), str))
1589 self.assertEqual(len(msg.defects), 2)
1590 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1591 unless(isinstance(msg.defects[1],
1592 errors.MultipartInvariantViolationDefect))
1593
1594 def test_invalid_content_type(self):
1595 eq = self.assertEqual
1596 neq = self.ndiffAssertEqual
1597 msg = Message()
1598 # RFC 2045, $5.2 says invalid yields text/plain
1599 msg['Content-Type'] = 'text'
1600 eq(msg.get_content_maintype(), 'text')
1601 eq(msg.get_content_subtype(), 'plain')
1602 eq(msg.get_content_type(), 'text/plain')
1603 # Clear the old value and try something /really/ invalid
1604 del msg['content-type']
1605 msg['Content-Type'] = 'foo'
1606 eq(msg.get_content_maintype(), 'text')
1607 eq(msg.get_content_subtype(), 'plain')
1608 eq(msg.get_content_type(), 'text/plain')
1609 # Still, make sure that the message is idempotently generated
1610 s = StringIO()
1611 g = Generator(s)
1612 g.flatten(msg)
1613 neq(s.getvalue(), 'Content-Type: foo\n\n')
1614
1615 def test_no_start_boundary(self):
1616 eq = self.ndiffAssertEqual
1617 msg = self._msgobj('msg_31.txt')
1618 eq(msg.get_payload(), """\
1619--BOUNDARY
1620Content-Type: text/plain
1621
1622message 1
1623
1624--BOUNDARY
1625Content-Type: text/plain
1626
1627message 2
1628
1629--BOUNDARY--
1630""")
1631
1632 def test_no_separating_blank_line(self):
1633 eq = self.ndiffAssertEqual
1634 msg = self._msgobj('msg_35.txt')
1635 eq(msg.as_string(), """\
1636From: aperson@dom.ain
1637To: bperson@dom.ain
1638Subject: here's something interesting
1639
1640counter to RFC 2822, there's no separating newline here
1641""")
1642
1643 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001644 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001645 msg = self._msgobj('msg_41.txt')
1646 unless(hasattr(msg, 'defects'))
1647 self.assertEqual(len(msg.defects), 2)
1648 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1649 unless(isinstance(msg.defects[1],
1650 errors.MultipartInvariantViolationDefect))
1651
1652 def test_missing_start_boundary(self):
1653 outer = self._msgobj('msg_42.txt')
1654 # The message structure is:
1655 #
1656 # multipart/mixed
1657 # text/plain
1658 # message/rfc822
1659 # multipart/mixed [*]
1660 #
1661 # [*] This message is missing its start boundary
1662 bad = outer.get_payload(1).get_payload(0)
1663 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001664 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001665 errors.StartBoundaryNotFoundDefect))
1666
1667 def test_first_line_is_continuation_header(self):
1668 eq = self.assertEqual
1669 m = ' Line 1\nLine 2\nLine 3'
1670 msg = email.message_from_string(m)
1671 eq(msg.keys(), [])
1672 eq(msg.get_payload(), 'Line 2\nLine 3')
1673 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001674 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001675 errors.FirstHeaderLineIsContinuationDefect))
1676 eq(msg.defects[0].line, ' Line 1\n')
1677
1678
Ezio Melottib3aedd42010-11-20 19:04:17 +00001679
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001680# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001681class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001682 def test_rfc2047_multiline(self):
1683 eq = self.assertEqual
1684 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1685 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1686 dh = decode_header(s)
1687 eq(dh, [
1688 (b'Re:', None),
1689 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1690 (b'baz foo bar', None),
1691 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1692 header = make_header(dh)
1693 eq(str(header),
1694 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001695 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001696Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1697 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001698
1699 def test_whitespace_eater_unicode(self):
1700 eq = self.assertEqual
1701 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1702 dh = decode_header(s)
1703 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1704 (b'Pirard <pirard@dom.ain>', None)])
1705 header = str(make_header(dh))
1706 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1707
1708 def test_whitespace_eater_unicode_2(self):
1709 eq = self.assertEqual
1710 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1711 dh = decode_header(s)
1712 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1713 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1714 hu = str(make_header(dh))
1715 eq(hu, 'The quick brown fox jumped over the lazy dog')
1716
1717 def test_rfc2047_missing_whitespace(self):
1718 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1719 dh = decode_header(s)
1720 self.assertEqual(dh, [(s, None)])
1721
1722 def test_rfc2047_with_whitespace(self):
1723 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1724 dh = decode_header(s)
1725 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1726 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1727 (b'sbord', None)])
1728
R. David Murrayc4e69cc2010-08-03 22:14:10 +00001729 def test_rfc2047_B_bad_padding(self):
1730 s = '=?iso-8859-1?B?%s?='
1731 data = [ # only test complete bytes
1732 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
1733 ('dmk=', b'vi'), ('dmk', b'vi')
1734 ]
1735 for q, a in data:
1736 dh = decode_header(s % q)
1737 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001738
R. David Murray31e984c2010-10-01 15:40:20 +00001739 def test_rfc2047_Q_invalid_digits(self):
1740 # issue 10004.
1741 s = '=?iso-8659-1?Q?andr=e9=zz?='
1742 self.assertEqual(decode_header(s),
1743 [(b'andr\xe9=zz', 'iso-8659-1')])
1744
Ezio Melottib3aedd42010-11-20 19:04:17 +00001745
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001746# Test the MIMEMessage class
1747class TestMIMEMessage(TestEmailBase):
1748 def setUp(self):
1749 with openfile('msg_11.txt') as fp:
1750 self._text = fp.read()
1751
1752 def test_type_error(self):
1753 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1754
1755 def test_valid_argument(self):
1756 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001757 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001758 subject = 'A sub-message'
1759 m = Message()
1760 m['Subject'] = subject
1761 r = MIMEMessage(m)
1762 eq(r.get_content_type(), 'message/rfc822')
1763 payload = r.get_payload()
1764 unless(isinstance(payload, list))
1765 eq(len(payload), 1)
1766 subpart = payload[0]
1767 unless(subpart is m)
1768 eq(subpart['subject'], subject)
1769
1770 def test_bad_multipart(self):
1771 eq = self.assertEqual
1772 msg1 = Message()
1773 msg1['Subject'] = 'subpart 1'
1774 msg2 = Message()
1775 msg2['Subject'] = 'subpart 2'
1776 r = MIMEMessage(msg1)
1777 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
1778
1779 def test_generate(self):
1780 # First craft the message to be encapsulated
1781 m = Message()
1782 m['Subject'] = 'An enclosed message'
1783 m.set_payload('Here is the body of the message.\n')
1784 r = MIMEMessage(m)
1785 r['Subject'] = 'The enclosing message'
1786 s = StringIO()
1787 g = Generator(s)
1788 g.flatten(r)
1789 self.assertEqual(s.getvalue(), """\
1790Content-Type: message/rfc822
1791MIME-Version: 1.0
1792Subject: The enclosing message
1793
1794Subject: An enclosed message
1795
1796Here is the body of the message.
1797""")
1798
1799 def test_parse_message_rfc822(self):
1800 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001801 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001802 msg = self._msgobj('msg_11.txt')
1803 eq(msg.get_content_type(), 'message/rfc822')
1804 payload = msg.get_payload()
1805 unless(isinstance(payload, list))
1806 eq(len(payload), 1)
1807 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001808 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001809 eq(submsg['subject'], 'An enclosed message')
1810 eq(submsg.get_payload(), 'Here is the body of the message.\n')
1811
1812 def test_dsn(self):
1813 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001814 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001815 # msg 16 is a Delivery Status Notification, see RFC 1894
1816 msg = self._msgobj('msg_16.txt')
1817 eq(msg.get_content_type(), 'multipart/report')
1818 unless(msg.is_multipart())
1819 eq(len(msg.get_payload()), 3)
1820 # Subpart 1 is a text/plain, human readable section
1821 subpart = msg.get_payload(0)
1822 eq(subpart.get_content_type(), 'text/plain')
1823 eq(subpart.get_payload(), """\
1824This report relates to a message you sent with the following header fields:
1825
1826 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
1827 Date: Sun, 23 Sep 2001 20:10:55 -0700
1828 From: "Ian T. Henry" <henryi@oxy.edu>
1829 To: SoCal Raves <scr@socal-raves.org>
1830 Subject: [scr] yeah for Ians!!
1831
1832Your message cannot be delivered to the following recipients:
1833
1834 Recipient address: jangel1@cougar.noc.ucla.edu
1835 Reason: recipient reached disk quota
1836
1837""")
1838 # Subpart 2 contains the machine parsable DSN information. It
1839 # consists of two blocks of headers, represented by two nested Message
1840 # objects.
1841 subpart = msg.get_payload(1)
1842 eq(subpart.get_content_type(), 'message/delivery-status')
1843 eq(len(subpart.get_payload()), 2)
1844 # message/delivery-status should treat each block as a bunch of
1845 # headers, i.e. a bunch of Message objects.
1846 dsn1 = subpart.get_payload(0)
1847 unless(isinstance(dsn1, Message))
1848 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
1849 eq(dsn1.get_param('dns', header='reporting-mta'), '')
1850 # Try a missing one <wink>
1851 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
1852 dsn2 = subpart.get_payload(1)
1853 unless(isinstance(dsn2, Message))
1854 eq(dsn2['action'], 'failed')
1855 eq(dsn2.get_params(header='original-recipient'),
1856 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
1857 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
1858 # Subpart 3 is the original message
1859 subpart = msg.get_payload(2)
1860 eq(subpart.get_content_type(), 'message/rfc822')
1861 payload = subpart.get_payload()
1862 unless(isinstance(payload, list))
1863 eq(len(payload), 1)
1864 subsubpart = payload[0]
1865 unless(isinstance(subsubpart, Message))
1866 eq(subsubpart.get_content_type(), 'text/plain')
1867 eq(subsubpart['message-id'],
1868 '<002001c144a6$8752e060$56104586@oxy.edu>')
1869
1870 def test_epilogue(self):
1871 eq = self.ndiffAssertEqual
1872 with openfile('msg_21.txt') as fp:
1873 text = fp.read()
1874 msg = Message()
1875 msg['From'] = 'aperson@dom.ain'
1876 msg['To'] = 'bperson@dom.ain'
1877 msg['Subject'] = 'Test'
1878 msg.preamble = 'MIME message'
1879 msg.epilogue = 'End of MIME message\n'
1880 msg1 = MIMEText('One')
1881 msg2 = MIMEText('Two')
1882 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1883 msg.attach(msg1)
1884 msg.attach(msg2)
1885 sfp = StringIO()
1886 g = Generator(sfp)
1887 g.flatten(msg)
1888 eq(sfp.getvalue(), text)
1889
1890 def test_no_nl_preamble(self):
1891 eq = self.ndiffAssertEqual
1892 msg = Message()
1893 msg['From'] = 'aperson@dom.ain'
1894 msg['To'] = 'bperson@dom.ain'
1895 msg['Subject'] = 'Test'
1896 msg.preamble = 'MIME message'
1897 msg.epilogue = ''
1898 msg1 = MIMEText('One')
1899 msg2 = MIMEText('Two')
1900 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1901 msg.attach(msg1)
1902 msg.attach(msg2)
1903 eq(msg.as_string(), """\
1904From: aperson@dom.ain
1905To: bperson@dom.ain
1906Subject: Test
1907Content-Type: multipart/mixed; boundary="BOUNDARY"
1908
1909MIME message
1910--BOUNDARY
1911Content-Type: text/plain; charset="us-ascii"
1912MIME-Version: 1.0
1913Content-Transfer-Encoding: 7bit
1914
1915One
1916--BOUNDARY
1917Content-Type: text/plain; charset="us-ascii"
1918MIME-Version: 1.0
1919Content-Transfer-Encoding: 7bit
1920
1921Two
1922--BOUNDARY--
1923""")
1924
1925 def test_default_type(self):
1926 eq = self.assertEqual
1927 with openfile('msg_30.txt') as fp:
1928 msg = email.message_from_file(fp)
1929 container1 = msg.get_payload(0)
1930 eq(container1.get_default_type(), 'message/rfc822')
1931 eq(container1.get_content_type(), 'message/rfc822')
1932 container2 = msg.get_payload(1)
1933 eq(container2.get_default_type(), 'message/rfc822')
1934 eq(container2.get_content_type(), 'message/rfc822')
1935 container1a = container1.get_payload(0)
1936 eq(container1a.get_default_type(), 'text/plain')
1937 eq(container1a.get_content_type(), 'text/plain')
1938 container2a = container2.get_payload(0)
1939 eq(container2a.get_default_type(), 'text/plain')
1940 eq(container2a.get_content_type(), 'text/plain')
1941
1942 def test_default_type_with_explicit_container_type(self):
1943 eq = self.assertEqual
1944 with openfile('msg_28.txt') as fp:
1945 msg = email.message_from_file(fp)
1946 container1 = msg.get_payload(0)
1947 eq(container1.get_default_type(), 'message/rfc822')
1948 eq(container1.get_content_type(), 'message/rfc822')
1949 container2 = msg.get_payload(1)
1950 eq(container2.get_default_type(), 'message/rfc822')
1951 eq(container2.get_content_type(), 'message/rfc822')
1952 container1a = container1.get_payload(0)
1953 eq(container1a.get_default_type(), 'text/plain')
1954 eq(container1a.get_content_type(), 'text/plain')
1955 container2a = container2.get_payload(0)
1956 eq(container2a.get_default_type(), 'text/plain')
1957 eq(container2a.get_content_type(), 'text/plain')
1958
1959 def test_default_type_non_parsed(self):
1960 eq = self.assertEqual
1961 neq = self.ndiffAssertEqual
1962 # Set up container
1963 container = MIMEMultipart('digest', 'BOUNDARY')
1964 container.epilogue = ''
1965 # Set up subparts
1966 subpart1a = MIMEText('message 1\n')
1967 subpart2a = MIMEText('message 2\n')
1968 subpart1 = MIMEMessage(subpart1a)
1969 subpart2 = MIMEMessage(subpart2a)
1970 container.attach(subpart1)
1971 container.attach(subpart2)
1972 eq(subpart1.get_content_type(), 'message/rfc822')
1973 eq(subpart1.get_default_type(), 'message/rfc822')
1974 eq(subpart2.get_content_type(), 'message/rfc822')
1975 eq(subpart2.get_default_type(), 'message/rfc822')
1976 neq(container.as_string(0), '''\
1977Content-Type: multipart/digest; boundary="BOUNDARY"
1978MIME-Version: 1.0
1979
1980--BOUNDARY
1981Content-Type: message/rfc822
1982MIME-Version: 1.0
1983
1984Content-Type: text/plain; charset="us-ascii"
1985MIME-Version: 1.0
1986Content-Transfer-Encoding: 7bit
1987
1988message 1
1989
1990--BOUNDARY
1991Content-Type: message/rfc822
1992MIME-Version: 1.0
1993
1994Content-Type: text/plain; charset="us-ascii"
1995MIME-Version: 1.0
1996Content-Transfer-Encoding: 7bit
1997
1998message 2
1999
2000--BOUNDARY--
2001''')
2002 del subpart1['content-type']
2003 del subpart1['mime-version']
2004 del subpart2['content-type']
2005 del subpart2['mime-version']
2006 eq(subpart1.get_content_type(), 'message/rfc822')
2007 eq(subpart1.get_default_type(), 'message/rfc822')
2008 eq(subpart2.get_content_type(), 'message/rfc822')
2009 eq(subpart2.get_default_type(), 'message/rfc822')
2010 neq(container.as_string(0), '''\
2011Content-Type: multipart/digest; boundary="BOUNDARY"
2012MIME-Version: 1.0
2013
2014--BOUNDARY
2015
2016Content-Type: text/plain; charset="us-ascii"
2017MIME-Version: 1.0
2018Content-Transfer-Encoding: 7bit
2019
2020message 1
2021
2022--BOUNDARY
2023
2024Content-Type: text/plain; charset="us-ascii"
2025MIME-Version: 1.0
2026Content-Transfer-Encoding: 7bit
2027
2028message 2
2029
2030--BOUNDARY--
2031''')
2032
2033 def test_mime_attachments_in_constructor(self):
2034 eq = self.assertEqual
2035 text1 = MIMEText('')
2036 text2 = MIMEText('')
2037 msg = MIMEMultipart(_subparts=(text1, text2))
2038 eq(len(msg.get_payload()), 2)
2039 eq(msg.get_payload(0), text1)
2040 eq(msg.get_payload(1), text2)
2041
Christian Heimes587c2bf2008-01-19 16:21:02 +00002042 def test_default_multipart_constructor(self):
2043 msg = MIMEMultipart()
2044 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002045
Ezio Melottib3aedd42010-11-20 19:04:17 +00002046
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002047# A general test of parser->model->generator idempotency. IOW, read a message
2048# in, parse it into a message object tree, then without touching the tree,
2049# regenerate the plain text. The original text and the transformed text
2050# should be identical. Note: that we ignore the Unix-From since that may
2051# contain a changed date.
2052class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002053
2054 linesep = '\n'
2055
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002056 def _msgobj(self, filename):
2057 with openfile(filename) as fp:
2058 data = fp.read()
2059 msg = email.message_from_string(data)
2060 return msg, data
2061
R. David Murray719a4492010-11-21 16:53:48 +00002062 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002063 eq = self.ndiffAssertEqual
2064 s = StringIO()
2065 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002066 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002067 eq(text, s.getvalue())
2068
2069 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002070 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002071 msg, text = self._msgobj('msg_01.txt')
2072 eq(msg.get_content_type(), 'text/plain')
2073 eq(msg.get_content_maintype(), 'text')
2074 eq(msg.get_content_subtype(), 'plain')
2075 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2076 eq(msg.get_param('charset'), 'us-ascii')
2077 eq(msg.preamble, None)
2078 eq(msg.epilogue, None)
2079 self._idempotent(msg, text)
2080
2081 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002082 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002083 msg, text = self._msgobj('msg_03.txt')
2084 eq(msg.get_content_type(), 'text/plain')
2085 eq(msg.get_params(), None)
2086 eq(msg.get_param('charset'), None)
2087 self._idempotent(msg, text)
2088
2089 def test_simple_multipart(self):
2090 msg, text = self._msgobj('msg_04.txt')
2091 self._idempotent(msg, text)
2092
2093 def test_MIME_digest(self):
2094 msg, text = self._msgobj('msg_02.txt')
2095 self._idempotent(msg, text)
2096
2097 def test_long_header(self):
2098 msg, text = self._msgobj('msg_27.txt')
2099 self._idempotent(msg, text)
2100
2101 def test_MIME_digest_with_part_headers(self):
2102 msg, text = self._msgobj('msg_28.txt')
2103 self._idempotent(msg, text)
2104
2105 def test_mixed_with_image(self):
2106 msg, text = self._msgobj('msg_06.txt')
2107 self._idempotent(msg, text)
2108
2109 def test_multipart_report(self):
2110 msg, text = self._msgobj('msg_05.txt')
2111 self._idempotent(msg, text)
2112
2113 def test_dsn(self):
2114 msg, text = self._msgobj('msg_16.txt')
2115 self._idempotent(msg, text)
2116
2117 def test_preamble_epilogue(self):
2118 msg, text = self._msgobj('msg_21.txt')
2119 self._idempotent(msg, text)
2120
2121 def test_multipart_one_part(self):
2122 msg, text = self._msgobj('msg_23.txt')
2123 self._idempotent(msg, text)
2124
2125 def test_multipart_no_parts(self):
2126 msg, text = self._msgobj('msg_24.txt')
2127 self._idempotent(msg, text)
2128
2129 def test_no_start_boundary(self):
2130 msg, text = self._msgobj('msg_31.txt')
2131 self._idempotent(msg, text)
2132
2133 def test_rfc2231_charset(self):
2134 msg, text = self._msgobj('msg_32.txt')
2135 self._idempotent(msg, text)
2136
2137 def test_more_rfc2231_parameters(self):
2138 msg, text = self._msgobj('msg_33.txt')
2139 self._idempotent(msg, text)
2140
2141 def test_text_plain_in_a_multipart_digest(self):
2142 msg, text = self._msgobj('msg_34.txt')
2143 self._idempotent(msg, text)
2144
2145 def test_nested_multipart_mixeds(self):
2146 msg, text = self._msgobj('msg_12a.txt')
2147 self._idempotent(msg, text)
2148
2149 def test_message_external_body_idempotent(self):
2150 msg, text = self._msgobj('msg_36.txt')
2151 self._idempotent(msg, text)
2152
R. David Murray719a4492010-11-21 16:53:48 +00002153 def test_message_delivery_status(self):
2154 msg, text = self._msgobj('msg_43.txt')
2155 self._idempotent(msg, text, unixfrom=True)
2156
R. David Murray96fd54e2010-10-08 15:55:28 +00002157 def test_message_signed_idempotent(self):
2158 msg, text = self._msgobj('msg_45.txt')
2159 self._idempotent(msg, text)
2160
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002161 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002162 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002163 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002164 # Get a message object and reset the seek pointer for other tests
2165 msg, text = self._msgobj('msg_05.txt')
2166 eq(msg.get_content_type(), 'multipart/report')
2167 # Test the Content-Type: parameters
2168 params = {}
2169 for pk, pv in msg.get_params():
2170 params[pk] = pv
2171 eq(params['report-type'], 'delivery-status')
2172 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002173 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2174 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002175 eq(len(msg.get_payload()), 3)
2176 # Make sure the subparts are what we expect
2177 msg1 = msg.get_payload(0)
2178 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002179 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002180 msg2 = msg.get_payload(1)
2181 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002182 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002183 msg3 = msg.get_payload(2)
2184 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002185 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002186 payload = msg3.get_payload()
2187 unless(isinstance(payload, list))
2188 eq(len(payload), 1)
2189 msg4 = payload[0]
2190 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002191 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002192
2193 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002194 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002195 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002196 msg, text = self._msgobj('msg_06.txt')
2197 # Check some of the outer headers
2198 eq(msg.get_content_type(), 'message/rfc822')
2199 # Make sure the payload is a list of exactly one sub-Message, and that
2200 # that submessage has a type of text/plain
2201 payload = msg.get_payload()
2202 unless(isinstance(payload, list))
2203 eq(len(payload), 1)
2204 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002205 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002206 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002207 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002208 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002209
2210
Ezio Melottib3aedd42010-11-20 19:04:17 +00002211
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002212# Test various other bits of the package's functionality
2213class TestMiscellaneous(TestEmailBase):
2214 def test_message_from_string(self):
2215 with openfile('msg_01.txt') as fp:
2216 text = fp.read()
2217 msg = email.message_from_string(text)
2218 s = StringIO()
2219 # Don't wrap/continue long headers since we're trying to test
2220 # idempotency.
2221 g = Generator(s, maxheaderlen=0)
2222 g.flatten(msg)
2223 self.assertEqual(text, s.getvalue())
2224
2225 def test_message_from_file(self):
2226 with openfile('msg_01.txt') as fp:
2227 text = fp.read()
2228 fp.seek(0)
2229 msg = email.message_from_file(fp)
2230 s = StringIO()
2231 # Don't wrap/continue long headers since we're trying to test
2232 # idempotency.
2233 g = Generator(s, maxheaderlen=0)
2234 g.flatten(msg)
2235 self.assertEqual(text, s.getvalue())
2236
2237 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002238 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002239 with openfile('msg_01.txt') as fp:
2240 text = fp.read()
2241
2242 # Create a subclass
2243 class MyMessage(Message):
2244 pass
2245
2246 msg = email.message_from_string(text, MyMessage)
2247 unless(isinstance(msg, MyMessage))
2248 # Try something more complicated
2249 with openfile('msg_02.txt') as fp:
2250 text = fp.read()
2251 msg = email.message_from_string(text, MyMessage)
2252 for subpart in msg.walk():
2253 unless(isinstance(subpart, MyMessage))
2254
2255 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002256 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002257 # Create a subclass
2258 class MyMessage(Message):
2259 pass
2260
2261 with openfile('msg_01.txt') as fp:
2262 msg = email.message_from_file(fp, MyMessage)
2263 unless(isinstance(msg, MyMessage))
2264 # Try something more complicated
2265 with openfile('msg_02.txt') as fp:
2266 msg = email.message_from_file(fp, MyMessage)
2267 for subpart in msg.walk():
2268 unless(isinstance(subpart, MyMessage))
2269
2270 def test__all__(self):
2271 module = __import__('email')
2272 # Can't use sorted() here due to Python 2.3 compatibility
2273 all = module.__all__[:]
2274 all.sort()
2275 self.assertEqual(all, [
2276 'base64mime', 'charset', 'encoders', 'errors', 'generator',
R. David Murray96fd54e2010-10-08 15:55:28 +00002277 'header', 'iterators', 'message', 'message_from_binary_file',
2278 'message_from_bytes', 'message_from_file',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002279 'message_from_string', 'mime', 'parser',
2280 'quoprimime', 'utils',
2281 ])
2282
2283 def test_formatdate(self):
2284 now = time.time()
2285 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2286 time.gmtime(now)[:6])
2287
2288 def test_formatdate_localtime(self):
2289 now = time.time()
2290 self.assertEqual(
2291 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2292 time.localtime(now)[:6])
2293
2294 def test_formatdate_usegmt(self):
2295 now = time.time()
2296 self.assertEqual(
2297 utils.formatdate(now, localtime=False),
2298 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2299 self.assertEqual(
2300 utils.formatdate(now, localtime=False, usegmt=True),
2301 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2302
2303 def test_parsedate_none(self):
2304 self.assertEqual(utils.parsedate(''), None)
2305
2306 def test_parsedate_compact(self):
2307 # The FWS after the comma is optional
2308 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2309 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2310
2311 def test_parsedate_no_dayofweek(self):
2312 eq = self.assertEqual
2313 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2314 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2315
2316 def test_parsedate_compact_no_dayofweek(self):
2317 eq = self.assertEqual
2318 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2319 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2320
R. David Murray4a62e892010-12-23 20:35:46 +00002321 def test_parsedate_no_space_before_positive_offset(self):
2322 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2323 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2324
2325 def test_parsedate_no_space_before_negative_offset(self):
2326 # Issue 1155362: we already handled '+' for this case.
2327 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2328 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2329
2330
R David Murrayaccd1c02011-03-13 20:06:23 -04002331 def test_parsedate_accepts_time_with_dots(self):
2332 eq = self.assertEqual
2333 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2334 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2335 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2336 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2337
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002338 def test_parsedate_acceptable_to_time_functions(self):
2339 eq = self.assertEqual
2340 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2341 t = int(time.mktime(timetup))
2342 eq(time.localtime(t)[:6], timetup[:6])
2343 eq(int(time.strftime('%Y', timetup)), 2003)
2344 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2345 t = int(time.mktime(timetup[:9]))
2346 eq(time.localtime(t)[:6], timetup[:6])
2347 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2348
R. David Murray219d1c82010-08-25 00:45:55 +00002349 def test_parsedate_y2k(self):
2350 """Test for parsing a date with a two-digit year.
2351
2352 Parsing a date with a two-digit year should return the correct
2353 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2354 obsoletes RFC822) requires four-digit years.
2355
2356 """
2357 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2358 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2359 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2360 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2361
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002362 def test_parseaddr_empty(self):
2363 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2364 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2365
2366 def test_noquote_dump(self):
2367 self.assertEqual(
2368 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2369 'A Silly Person <person@dom.ain>')
2370
2371 def test_escape_dump(self):
2372 self.assertEqual(
2373 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2374 r'"A \(Very\) Silly Person" <person@dom.ain>')
2375 a = r'A \(Special\) Person'
2376 b = 'person@dom.ain'
2377 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2378
2379 def test_escape_backslashes(self):
2380 self.assertEqual(
2381 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2382 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2383 a = r'Arthur \Backslash\ Foobar'
2384 b = 'person@dom.ain'
2385 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2386
2387 def test_name_with_dot(self):
2388 x = 'John X. Doe <jxd@example.com>'
2389 y = '"John X. Doe" <jxd@example.com>'
2390 a, b = ('John X. Doe', 'jxd@example.com')
2391 self.assertEqual(utils.parseaddr(x), (a, b))
2392 self.assertEqual(utils.parseaddr(y), (a, b))
2393 # formataddr() quotes the name if there's a dot in it
2394 self.assertEqual(utils.formataddr((a, b)), y)
2395
R. David Murray5397e862010-10-02 15:58:26 +00002396 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2397 # issue 10005. Note that in the third test the second pair of
2398 # backslashes is not actually a quoted pair because it is not inside a
2399 # comment or quoted string: the address being parsed has a quoted
2400 # string containing a quoted backslash, followed by 'example' and two
2401 # backslashes, followed by another quoted string containing a space and
2402 # the word 'example'. parseaddr copies those two backslashes
2403 # literally. Per rfc5322 this is not technically correct since a \ may
2404 # not appear in an address outside of a quoted string. It is probably
2405 # a sensible Postel interpretation, though.
2406 eq = self.assertEqual
2407 eq(utils.parseaddr('""example" example"@example.com'),
2408 ('', '""example" example"@example.com'))
2409 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2410 ('', '"\\"example\\" example"@example.com'))
2411 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2412 ('', '"\\\\"example\\\\" example"@example.com'))
2413
R. David Murray63563cd2010-12-18 18:25:38 +00002414 def test_parseaddr_preserves_spaces_in_local_part(self):
2415 # issue 9286. A normal RFC5322 local part should not contain any
2416 # folding white space, but legacy local parts can (they are a sequence
2417 # of atoms, not dotatoms). On the other hand we strip whitespace from
2418 # before the @ and around dots, on the assumption that the whitespace
2419 # around the punctuation is a mistake in what would otherwise be
2420 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2421 self.assertEqual(('', "merwok wok@xample.com"),
2422 utils.parseaddr("merwok wok@xample.com"))
2423 self.assertEqual(('', "merwok wok@xample.com"),
2424 utils.parseaddr("merwok wok@xample.com"))
2425 self.assertEqual(('', "merwok wok@xample.com"),
2426 utils.parseaddr(" merwok wok @xample.com"))
2427 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2428 utils.parseaddr('merwok"wok" wok@xample.com'))
2429 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2430 utils.parseaddr('merwok. wok . wok@xample.com'))
2431
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002432 def test_multiline_from_comment(self):
2433 x = """\
2434Foo
2435\tBar <foo@example.com>"""
2436 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2437
2438 def test_quote_dump(self):
2439 self.assertEqual(
2440 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2441 r'"A Silly; Person" <person@dom.ain>')
2442
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002443 def test_charset_richcomparisons(self):
2444 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002445 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002446 cset1 = Charset()
2447 cset2 = Charset()
2448 eq(cset1, 'us-ascii')
2449 eq(cset1, 'US-ASCII')
2450 eq(cset1, 'Us-AsCiI')
2451 eq('us-ascii', cset1)
2452 eq('US-ASCII', cset1)
2453 eq('Us-AsCiI', cset1)
2454 ne(cset1, 'usascii')
2455 ne(cset1, 'USASCII')
2456 ne(cset1, 'UsAsCiI')
2457 ne('usascii', cset1)
2458 ne('USASCII', cset1)
2459 ne('UsAsCiI', cset1)
2460 eq(cset1, cset2)
2461 eq(cset2, cset1)
2462
2463 def test_getaddresses(self):
2464 eq = self.assertEqual
2465 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2466 'Bud Person <bperson@dom.ain>']),
2467 [('Al Person', 'aperson@dom.ain'),
2468 ('Bud Person', 'bperson@dom.ain')])
2469
2470 def test_getaddresses_nasty(self):
2471 eq = self.assertEqual
2472 eq(utils.getaddresses(['foo: ;']), [('', '')])
2473 eq(utils.getaddresses(
2474 ['[]*-- =~$']),
2475 [('', ''), ('', ''), ('', '*--')])
2476 eq(utils.getaddresses(
2477 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2478 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2479
2480 def test_getaddresses_embedded_comment(self):
2481 """Test proper handling of a nested comment"""
2482 eq = self.assertEqual
2483 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2484 eq(addrs[0][1], 'foo@bar.com')
2485
2486 def test_utils_quote_unquote(self):
2487 eq = self.assertEqual
2488 msg = Message()
2489 msg.add_header('content-disposition', 'attachment',
2490 filename='foo\\wacky"name')
2491 eq(msg.get_filename(), 'foo\\wacky"name')
2492
2493 def test_get_body_encoding_with_bogus_charset(self):
2494 charset = Charset('not a charset')
2495 self.assertEqual(charset.get_body_encoding(), 'base64')
2496
2497 def test_get_body_encoding_with_uppercase_charset(self):
2498 eq = self.assertEqual
2499 msg = Message()
2500 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2501 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2502 charsets = msg.get_charsets()
2503 eq(len(charsets), 1)
2504 eq(charsets[0], 'utf-8')
2505 charset = Charset(charsets[0])
2506 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002507 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002508 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2509 eq(msg.get_payload(decode=True), b'hello world')
2510 eq(msg['content-transfer-encoding'], 'base64')
2511 # Try another one
2512 msg = Message()
2513 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2514 charsets = msg.get_charsets()
2515 eq(len(charsets), 1)
2516 eq(charsets[0], 'us-ascii')
2517 charset = Charset(charsets[0])
2518 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2519 msg.set_payload('hello world', charset=charset)
2520 eq(msg.get_payload(), 'hello world')
2521 eq(msg['content-transfer-encoding'], '7bit')
2522
2523 def test_charsets_case_insensitive(self):
2524 lc = Charset('us-ascii')
2525 uc = Charset('US-ASCII')
2526 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2527
2528 def test_partial_falls_inside_message_delivery_status(self):
2529 eq = self.ndiffAssertEqual
2530 # The Parser interface provides chunks of data to FeedParser in 8192
2531 # byte gulps. SF bug #1076485 found one of those chunks inside
2532 # message/delivery-status header block, which triggered an
2533 # unreadline() of NeedMoreData.
2534 msg = self._msgobj('msg_43.txt')
2535 sfp = StringIO()
2536 iterators._structure(msg, sfp)
2537 eq(sfp.getvalue(), """\
2538multipart/report
2539 text/plain
2540 message/delivery-status
2541 text/plain
2542 text/plain
2543 text/plain
2544 text/plain
2545 text/plain
2546 text/plain
2547 text/plain
2548 text/plain
2549 text/plain
2550 text/plain
2551 text/plain
2552 text/plain
2553 text/plain
2554 text/plain
2555 text/plain
2556 text/plain
2557 text/plain
2558 text/plain
2559 text/plain
2560 text/plain
2561 text/plain
2562 text/plain
2563 text/plain
2564 text/plain
2565 text/plain
2566 text/plain
2567 text/rfc822-headers
2568""")
2569
R. David Murraya0b44b52010-12-02 21:47:19 +00002570 def test_make_msgid_domain(self):
2571 self.assertEqual(
2572 email.utils.make_msgid(domain='testdomain-string')[-19:],
2573 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002574
Ezio Melottib3aedd42010-11-20 19:04:17 +00002575
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002576# Test the iterator/generators
2577class TestIterators(TestEmailBase):
2578 def test_body_line_iterator(self):
2579 eq = self.assertEqual
2580 neq = self.ndiffAssertEqual
2581 # First a simple non-multipart message
2582 msg = self._msgobj('msg_01.txt')
2583 it = iterators.body_line_iterator(msg)
2584 lines = list(it)
2585 eq(len(lines), 6)
2586 neq(EMPTYSTRING.join(lines), msg.get_payload())
2587 # Now a more complicated multipart
2588 msg = self._msgobj('msg_02.txt')
2589 it = iterators.body_line_iterator(msg)
2590 lines = list(it)
2591 eq(len(lines), 43)
2592 with openfile('msg_19.txt') as fp:
2593 neq(EMPTYSTRING.join(lines), fp.read())
2594
2595 def test_typed_subpart_iterator(self):
2596 eq = self.assertEqual
2597 msg = self._msgobj('msg_04.txt')
2598 it = iterators.typed_subpart_iterator(msg, 'text')
2599 lines = []
2600 subparts = 0
2601 for subpart in it:
2602 subparts += 1
2603 lines.append(subpart.get_payload())
2604 eq(subparts, 2)
2605 eq(EMPTYSTRING.join(lines), """\
2606a simple kind of mirror
2607to reflect upon our own
2608a simple kind of mirror
2609to reflect upon our own
2610""")
2611
2612 def test_typed_subpart_iterator_default_type(self):
2613 eq = self.assertEqual
2614 msg = self._msgobj('msg_03.txt')
2615 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2616 lines = []
2617 subparts = 0
2618 for subpart in it:
2619 subparts += 1
2620 lines.append(subpart.get_payload())
2621 eq(subparts, 1)
2622 eq(EMPTYSTRING.join(lines), """\
2623
2624Hi,
2625
2626Do you like this message?
2627
2628-Me
2629""")
2630
R. David Murray45bf773f2010-07-17 01:19:57 +00002631 def test_pushCR_LF(self):
2632 '''FeedParser BufferedSubFile.push() assumed it received complete
2633 line endings. A CR ending one push() followed by a LF starting
2634 the next push() added an empty line.
2635 '''
2636 imt = [
2637 ("a\r \n", 2),
2638 ("b", 0),
2639 ("c\n", 1),
2640 ("", 0),
2641 ("d\r\n", 1),
2642 ("e\r", 0),
2643 ("\nf", 1),
2644 ("\r\n", 1),
2645 ]
2646 from email.feedparser import BufferedSubFile, NeedMoreData
2647 bsf = BufferedSubFile()
2648 om = []
2649 nt = 0
2650 for il, n in imt:
2651 bsf.push(il)
2652 nt += n
2653 n1 = 0
2654 while True:
2655 ol = bsf.readline()
2656 if ol == NeedMoreData:
2657 break
2658 om.append(ol)
2659 n1 += 1
2660 self.assertTrue(n == n1)
2661 self.assertTrue(len(om) == nt)
2662 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2663
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002664
Ezio Melottib3aedd42010-11-20 19:04:17 +00002665
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002666class TestParsers(TestEmailBase):
2667 def test_header_parser(self):
2668 eq = self.assertEqual
2669 # Parse only the headers of a complex multipart MIME document
2670 with openfile('msg_02.txt') as fp:
2671 msg = HeaderParser().parse(fp)
2672 eq(msg['from'], 'ppp-request@zzz.org')
2673 eq(msg['to'], 'ppp@zzz.org')
2674 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002675 self.assertFalse(msg.is_multipart())
2676 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002677
2678 def test_whitespace_continuation(self):
2679 eq = self.assertEqual
2680 # This message contains a line after the Subject: header that has only
2681 # whitespace, but it is not empty!
2682 msg = email.message_from_string("""\
2683From: aperson@dom.ain
2684To: bperson@dom.ain
2685Subject: the next line has a space on it
2686\x20
2687Date: Mon, 8 Apr 2002 15:09:19 -0400
2688Message-ID: spam
2689
2690Here's the message body
2691""")
2692 eq(msg['subject'], 'the next line has a space on it\n ')
2693 eq(msg['message-id'], 'spam')
2694 eq(msg.get_payload(), "Here's the message body\n")
2695
2696 def test_whitespace_continuation_last_header(self):
2697 eq = self.assertEqual
2698 # Like the previous test, but the subject line is the last
2699 # header.
2700 msg = email.message_from_string("""\
2701From: aperson@dom.ain
2702To: bperson@dom.ain
2703Date: Mon, 8 Apr 2002 15:09:19 -0400
2704Message-ID: spam
2705Subject: the next line has a space on it
2706\x20
2707
2708Here's the message body
2709""")
2710 eq(msg['subject'], 'the next line has a space on it\n ')
2711 eq(msg['message-id'], 'spam')
2712 eq(msg.get_payload(), "Here's the message body\n")
2713
2714 def test_crlf_separation(self):
2715 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002716 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002717 msg = Parser().parse(fp)
2718 eq(len(msg.get_payload()), 2)
2719 part1 = msg.get_payload(0)
2720 eq(part1.get_content_type(), 'text/plain')
2721 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2722 part2 = msg.get_payload(1)
2723 eq(part2.get_content_type(), 'application/riscos')
2724
R. David Murray8451c4b2010-10-23 22:19:56 +00002725 def test_crlf_flatten(self):
2726 # Using newline='\n' preserves the crlfs in this input file.
2727 with openfile('msg_26.txt', newline='\n') as fp:
2728 text = fp.read()
2729 msg = email.message_from_string(text)
2730 s = StringIO()
2731 g = Generator(s)
2732 g.flatten(msg, linesep='\r\n')
2733 self.assertEqual(s.getvalue(), text)
2734
2735 maxDiff = None
2736
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002737 def test_multipart_digest_with_extra_mime_headers(self):
2738 eq = self.assertEqual
2739 neq = self.ndiffAssertEqual
2740 with openfile('msg_28.txt') as fp:
2741 msg = email.message_from_file(fp)
2742 # Structure is:
2743 # multipart/digest
2744 # message/rfc822
2745 # text/plain
2746 # message/rfc822
2747 # text/plain
2748 eq(msg.is_multipart(), 1)
2749 eq(len(msg.get_payload()), 2)
2750 part1 = msg.get_payload(0)
2751 eq(part1.get_content_type(), 'message/rfc822')
2752 eq(part1.is_multipart(), 1)
2753 eq(len(part1.get_payload()), 1)
2754 part1a = part1.get_payload(0)
2755 eq(part1a.is_multipart(), 0)
2756 eq(part1a.get_content_type(), 'text/plain')
2757 neq(part1a.get_payload(), 'message 1\n')
2758 # next message/rfc822
2759 part2 = msg.get_payload(1)
2760 eq(part2.get_content_type(), 'message/rfc822')
2761 eq(part2.is_multipart(), 1)
2762 eq(len(part2.get_payload()), 1)
2763 part2a = part2.get_payload(0)
2764 eq(part2a.is_multipart(), 0)
2765 eq(part2a.get_content_type(), 'text/plain')
2766 neq(part2a.get_payload(), 'message 2\n')
2767
2768 def test_three_lines(self):
2769 # A bug report by Andrew McNamara
2770 lines = ['From: Andrew Person <aperson@dom.ain',
2771 'Subject: Test',
2772 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2773 msg = email.message_from_string(NL.join(lines))
2774 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
2775
2776 def test_strip_line_feed_and_carriage_return_in_headers(self):
2777 eq = self.assertEqual
2778 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
2779 value1 = 'text'
2780 value2 = 'more text'
2781 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
2782 value1, value2)
2783 msg = email.message_from_string(m)
2784 eq(msg.get('Header'), value1)
2785 eq(msg.get('Next-Header'), value2)
2786
2787 def test_rfc2822_header_syntax(self):
2788 eq = self.assertEqual
2789 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2790 msg = email.message_from_string(m)
2791 eq(len(msg), 3)
2792 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
2793 eq(msg.get_payload(), 'body')
2794
2795 def test_rfc2822_space_not_allowed_in_header(self):
2796 eq = self.assertEqual
2797 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2798 msg = email.message_from_string(m)
2799 eq(len(msg.keys()), 0)
2800
2801 def test_rfc2822_one_character_header(self):
2802 eq = self.assertEqual
2803 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
2804 msg = email.message_from_string(m)
2805 headers = msg.keys()
2806 headers.sort()
2807 eq(headers, ['A', 'B', 'CC'])
2808 eq(msg.get_payload(), 'body')
2809
R. David Murray45e0e142010-06-16 02:19:40 +00002810 def test_CRLFLF_at_end_of_part(self):
2811 # issue 5610: feedparser should not eat two chars from body part ending
2812 # with "\r\n\n".
2813 m = (
2814 "From: foo@bar.com\n"
2815 "To: baz\n"
2816 "Mime-Version: 1.0\n"
2817 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
2818 "\n"
2819 "--BOUNDARY\n"
2820 "Content-Type: text/plain\n"
2821 "\n"
2822 "body ending with CRLF newline\r\n"
2823 "\n"
2824 "--BOUNDARY--\n"
2825 )
2826 msg = email.message_from_string(m)
2827 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002828
Ezio Melottib3aedd42010-11-20 19:04:17 +00002829
R. David Murray96fd54e2010-10-08 15:55:28 +00002830class Test8BitBytesHandling(unittest.TestCase):
2831 # In Python3 all input is string, but that doesn't work if the actual input
2832 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
2833 # decode byte streams using the surrogateescape error handler, and
2834 # reconvert to binary at appropriate places if we detect surrogates. This
2835 # doesn't allow us to transform headers with 8bit bytes (they get munged),
2836 # but it does allow us to parse and preserve them, and to decode body
2837 # parts that use an 8bit CTE.
2838
2839 bodytest_msg = textwrap.dedent("""\
2840 From: foo@bar.com
2841 To: baz
2842 Mime-Version: 1.0
2843 Content-Type: text/plain; charset={charset}
2844 Content-Transfer-Encoding: {cte}
2845
2846 {bodyline}
2847 """)
2848
2849 def test_known_8bit_CTE(self):
2850 m = self.bodytest_msg.format(charset='utf-8',
2851 cte='8bit',
2852 bodyline='pöstal').encode('utf-8')
2853 msg = email.message_from_bytes(m)
2854 self.assertEqual(msg.get_payload(), "pöstal\n")
2855 self.assertEqual(msg.get_payload(decode=True),
2856 "pöstal\n".encode('utf-8'))
2857
2858 def test_unknown_8bit_CTE(self):
2859 m = self.bodytest_msg.format(charset='notavalidcharset',
2860 cte='8bit',
2861 bodyline='pöstal').encode('utf-8')
2862 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00002863 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00002864 self.assertEqual(msg.get_payload(decode=True),
2865 "pöstal\n".encode('utf-8'))
2866
2867 def test_8bit_in_quopri_body(self):
2868 # This is non-RFC compliant data...without 'decode' the library code
2869 # decodes the body using the charset from the headers, and because the
2870 # source byte really is utf-8 this works. This is likely to fail
2871 # against real dirty data (ie: produce mojibake), but the data is
2872 # invalid anyway so it is as good a guess as any. But this means that
2873 # this test just confirms the current behavior; that behavior is not
2874 # necessarily the best possible behavior. With 'decode' it is
2875 # returning the raw bytes, so that test should be of correct behavior,
2876 # or at least produce the same result that email4 did.
2877 m = self.bodytest_msg.format(charset='utf-8',
2878 cte='quoted-printable',
2879 bodyline='p=C3=B6stál').encode('utf-8')
2880 msg = email.message_from_bytes(m)
2881 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
2882 self.assertEqual(msg.get_payload(decode=True),
2883 'pöstál\n'.encode('utf-8'))
2884
2885 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
2886 # This is similar to the previous test, but proves that if the 8bit
2887 # byte is undecodeable in the specified charset, it gets replaced
2888 # by the unicode 'unknown' character. Again, this may or may not
2889 # be the ideal behavior. Note that if decode=False none of the
2890 # decoders will get involved, so this is the only test we need
2891 # for this behavior.
2892 m = self.bodytest_msg.format(charset='ascii',
2893 cte='quoted-printable',
2894 bodyline='p=C3=B6stál').encode('utf-8')
2895 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00002896 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00002897 self.assertEqual(msg.get_payload(decode=True),
2898 'pöstál\n'.encode('utf-8'))
2899
2900 def test_8bit_in_base64_body(self):
2901 # Sticking an 8bit byte in a base64 block makes it undecodable by
2902 # normal means, so the block is returned undecoded, but as bytes.
2903 m = self.bodytest_msg.format(charset='utf-8',
2904 cte='base64',
2905 bodyline='cMO2c3RhbAá=').encode('utf-8')
2906 msg = email.message_from_bytes(m)
2907 self.assertEqual(msg.get_payload(decode=True),
2908 'cMO2c3RhbAá=\n'.encode('utf-8'))
2909
2910 def test_8bit_in_uuencode_body(self):
2911 # Sticking an 8bit byte in a uuencode block makes it undecodable by
2912 # normal means, so the block is returned undecoded, but as bytes.
2913 m = self.bodytest_msg.format(charset='utf-8',
2914 cte='uuencode',
2915 bodyline='<,.V<W1A; á ').encode('utf-8')
2916 msg = email.message_from_bytes(m)
2917 self.assertEqual(msg.get_payload(decode=True),
2918 '<,.V<W1A; á \n'.encode('utf-8'))
2919
2920
R. David Murray92532142011-01-07 23:25:30 +00002921 headertest_headers = (
2922 ('From: foo@bar.com', ('From', 'foo@bar.com')),
2923 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
2924 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
2925 '\tJean de Baddie',
2926 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
2927 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
2928 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
2929 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
2930 )
2931 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
2932 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00002933
2934 def test_get_8bit_header(self):
2935 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00002936 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
2937 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00002938
2939 def test_print_8bit_headers(self):
2940 msg = email.message_from_bytes(self.headertest_msg)
2941 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00002942 textwrap.dedent("""\
2943 From: {}
2944 To: {}
2945 Subject: {}
2946 From: {}
2947
2948 Yes, they are flying.
2949 """).format(*[expected[1] for (_, expected) in
2950 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00002951
2952 def test_values_with_8bit_headers(self):
2953 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00002954 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00002955 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00002956 'b\uFFFD\uFFFDz',
2957 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
2958 'coll\uFFFD\uFFFDgue, le pouf '
2959 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00002960 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00002961 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00002962
2963 def test_items_with_8bit_headers(self):
2964 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00002965 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00002966 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00002967 ('To', 'b\uFFFD\uFFFDz'),
2968 ('Subject', 'Maintenant je vous '
2969 'pr\uFFFD\uFFFDsente '
2970 'mon coll\uFFFD\uFFFDgue, le pouf '
2971 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
2972 '\tJean de Baddie'),
2973 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00002974
2975 def test_get_all_with_8bit_headers(self):
2976 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00002977 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00002978 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00002979 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00002980
2981 non_latin_bin_msg = textwrap.dedent("""\
2982 From: foo@bar.com
2983 To: báz
2984 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
2985 \tJean de Baddie
2986 Mime-Version: 1.0
2987 Content-Type: text/plain; charset="utf-8"
2988 Content-Transfer-Encoding: 8bit
2989
2990 Да, они летят.
2991 """).encode('utf-8')
2992
2993 def test_bytes_generator(self):
2994 msg = email.message_from_bytes(self.non_latin_bin_msg)
2995 out = BytesIO()
2996 email.generator.BytesGenerator(out).flatten(msg)
2997 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
2998
R. David Murray7372a072011-01-26 21:21:32 +00002999 def test_bytes_generator_handles_None_body(self):
3000 #Issue 11019
3001 msg = email.message.Message()
3002 out = BytesIO()
3003 email.generator.BytesGenerator(out).flatten(msg)
3004 self.assertEqual(out.getvalue(), b"\n")
3005
R. David Murray92532142011-01-07 23:25:30 +00003006 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003007 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003008 To: =?unknown-8bit?q?b=C3=A1z?=
3009 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3010 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3011 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003012 Mime-Version: 1.0
3013 Content-Type: text/plain; charset="utf-8"
3014 Content-Transfer-Encoding: base64
3015
3016 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3017 """)
3018
3019 def test_generator_handles_8bit(self):
3020 msg = email.message_from_bytes(self.non_latin_bin_msg)
3021 out = StringIO()
3022 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003023 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003024
3025 def test_bytes_generator_with_unix_from(self):
3026 # The unixfrom contains a current date, so we can't check it
3027 # literally. Just make sure the first word is 'From' and the
3028 # rest of the message matches the input.
3029 msg = email.message_from_bytes(self.non_latin_bin_msg)
3030 out = BytesIO()
3031 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3032 lines = out.getvalue().split(b'\n')
3033 self.assertEqual(lines[0].split()[0], b'From')
3034 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3035
R. David Murray92532142011-01-07 23:25:30 +00003036 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3037 non_latin_bin_msg_as7bit[2:4] = [
3038 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3039 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3040 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3041
R. David Murray96fd54e2010-10-08 15:55:28 +00003042 def test_message_from_binary_file(self):
3043 fn = 'test.msg'
3044 self.addCleanup(unlink, fn)
3045 with open(fn, 'wb') as testfile:
3046 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003047 with open(fn, 'rb') as testfile:
3048 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003049 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3050
3051 latin_bin_msg = textwrap.dedent("""\
3052 From: foo@bar.com
3053 To: Dinsdale
3054 Subject: Nudge nudge, wink, wink
3055 Mime-Version: 1.0
3056 Content-Type: text/plain; charset="latin-1"
3057 Content-Transfer-Encoding: 8bit
3058
3059 oh là là, know what I mean, know what I mean?
3060 """).encode('latin-1')
3061
3062 latin_bin_msg_as7bit = textwrap.dedent("""\
3063 From: foo@bar.com
3064 To: Dinsdale
3065 Subject: Nudge nudge, wink, wink
3066 Mime-Version: 1.0
3067 Content-Type: text/plain; charset="iso-8859-1"
3068 Content-Transfer-Encoding: quoted-printable
3069
3070 oh l=E0 l=E0, know what I mean, know what I mean?
3071 """)
3072
3073 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3074 m = email.message_from_bytes(self.latin_bin_msg)
3075 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3076
3077 def test_decoded_generator_emits_unicode_body(self):
3078 m = email.message_from_bytes(self.latin_bin_msg)
3079 out = StringIO()
3080 email.generator.DecodedGenerator(out).flatten(m)
3081 #DecodedHeader output contains an extra blank line compared
3082 #to the input message. RDM: not sure if this is a bug or not,
3083 #but it is not specific to the 8bit->7bit conversion.
3084 self.assertEqual(out.getvalue(),
3085 self.latin_bin_msg.decode('latin-1')+'\n')
3086
3087 def test_bytes_feedparser(self):
3088 bfp = email.feedparser.BytesFeedParser()
3089 for i in range(0, len(self.latin_bin_msg), 10):
3090 bfp.feed(self.latin_bin_msg[i:i+10])
3091 m = bfp.close()
3092 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3093
R. David Murray8451c4b2010-10-23 22:19:56 +00003094 def test_crlf_flatten(self):
3095 with openfile('msg_26.txt', 'rb') as fp:
3096 text = fp.read()
3097 msg = email.message_from_bytes(text)
3098 s = BytesIO()
3099 g = email.generator.BytesGenerator(s)
3100 g.flatten(msg, linesep='\r\n')
3101 self.assertEqual(s.getvalue(), text)
3102 maxDiff = None
3103
Ezio Melottib3aedd42010-11-20 19:04:17 +00003104
R. David Murray719a4492010-11-21 16:53:48 +00003105class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003106
R. David Murraye5db2632010-11-20 15:10:13 +00003107 maxDiff = None
3108
R. David Murray96fd54e2010-10-08 15:55:28 +00003109 def _msgobj(self, filename):
3110 with openfile(filename, 'rb') as fp:
3111 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003112 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003113 msg = email.message_from_bytes(data)
3114 return msg, data
3115
R. David Murray719a4492010-11-21 16:53:48 +00003116 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003117 b = BytesIO()
3118 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003119 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R. David Murraye5db2632010-11-20 15:10:13 +00003120 self.assertByteStringsEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003121
R. David Murraye5db2632010-11-20 15:10:13 +00003122 def assertByteStringsEqual(self, str1, str2):
R. David Murray719a4492010-11-21 16:53:48 +00003123 # Not using self.blinesep here is intentional. This way the output
3124 # is more useful when the failure results in mixed line endings.
R. David Murray96fd54e2010-10-08 15:55:28 +00003125 self.assertListEqual(str1.split(b'\n'), str2.split(b'\n'))
3126
3127
R. David Murray719a4492010-11-21 16:53:48 +00003128class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3129 TestIdempotent):
3130 linesep = '\n'
3131 blinesep = b'\n'
3132 normalize_linesep_regex = re.compile(br'\r\n')
3133
3134
3135class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3136 TestIdempotent):
3137 linesep = '\r\n'
3138 blinesep = b'\r\n'
3139 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3140
Ezio Melottib3aedd42010-11-20 19:04:17 +00003141
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003142class TestBase64(unittest.TestCase):
3143 def test_len(self):
3144 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003145 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003146 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003147 for size in range(15):
3148 if size == 0 : bsize = 0
3149 elif size <= 3 : bsize = 4
3150 elif size <= 6 : bsize = 8
3151 elif size <= 9 : bsize = 12
3152 elif size <= 12: bsize = 16
3153 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003154 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003155
3156 def test_decode(self):
3157 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003158 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003159 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003160
3161 def test_encode(self):
3162 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003163 eq(base64mime.body_encode(b''), b'')
3164 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003165 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003166 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003167 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003168 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003169eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3170eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3171eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3172eHh4eCB4eHh4IA==
3173""")
3174 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003175 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003176 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003177eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3178eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3179eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3180eHh4eCB4eHh4IA==\r
3181""")
3182
3183 def test_header_encode(self):
3184 eq = self.assertEqual
3185 he = base64mime.header_encode
3186 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003187 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3188 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003189 # Test the charset option
3190 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3191 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003192
3193
Ezio Melottib3aedd42010-11-20 19:04:17 +00003194
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003195class TestQuopri(unittest.TestCase):
3196 def setUp(self):
3197 # Set of characters (as byte integers) that don't need to be encoded
3198 # in headers.
3199 self.hlit = list(chain(
3200 range(ord('a'), ord('z') + 1),
3201 range(ord('A'), ord('Z') + 1),
3202 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003203 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003204 # Set of characters (as byte integers) that do need to be encoded in
3205 # headers.
3206 self.hnon = [c for c in range(256) if c not in self.hlit]
3207 assert len(self.hlit) + len(self.hnon) == 256
3208 # Set of characters (as byte integers) that don't need to be encoded
3209 # in bodies.
3210 self.blit = list(range(ord(' '), ord('~') + 1))
3211 self.blit.append(ord('\t'))
3212 self.blit.remove(ord('='))
3213 # Set of characters (as byte integers) that do need to be encoded in
3214 # bodies.
3215 self.bnon = [c for c in range(256) if c not in self.blit]
3216 assert len(self.blit) + len(self.bnon) == 256
3217
Guido van Rossum9604e662007-08-30 03:46:43 +00003218 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003219 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003220 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003221 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003222 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003223 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003224 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003225
Guido van Rossum9604e662007-08-30 03:46:43 +00003226 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003227 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003228 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003229 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003230 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003231 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003232 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003233
3234 def test_header_quopri_len(self):
3235 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003236 eq(quoprimime.header_length(b'hello'), 5)
3237 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003238 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003239 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003240 # =?xxx?q?...?= means 10 extra characters
3241 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003242 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3243 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003244 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003245 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003246 # =?xxx?q?...?= means 10 extra characters
3247 10)
3248 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003249 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003250 'expected length 1 for %r' % chr(c))
3251 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003252 # Space is special; it's encoded to _
3253 if c == ord(' '):
3254 continue
3255 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003256 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003257 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003258
3259 def test_body_quopri_len(self):
3260 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003261 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003262 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003263 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003264 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003265
3266 def test_quote_unquote_idempotent(self):
3267 for x in range(256):
3268 c = chr(x)
3269 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3270
3271 def test_header_encode(self):
3272 eq = self.assertEqual
3273 he = quoprimime.header_encode
3274 eq(he(b'hello'), '=?iso-8859-1?q?hello?=')
3275 eq(he(b'hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=')
3276 eq(he(b'hello\nworld'), '=?iso-8859-1?q?hello=0Aworld?=')
3277 # Test a non-ASCII character
3278 eq(he(b'hello\xc7there'), '=?iso-8859-1?q?hello=C7there?=')
3279
3280 def test_decode(self):
3281 eq = self.assertEqual
3282 eq(quoprimime.decode(''), '')
3283 eq(quoprimime.decode('hello'), 'hello')
3284 eq(quoprimime.decode('hello', 'X'), 'hello')
3285 eq(quoprimime.decode('hello\nworld', 'X'), 'helloXworld')
3286
3287 def test_encode(self):
3288 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003289 eq(quoprimime.body_encode(''), '')
3290 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003291 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003292 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003293 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003294 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003295xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3296 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3297x xxxx xxxx xxxx xxxx=20""")
3298 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003299 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3300 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003301xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3302 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3303x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003304 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003305one line
3306
3307two line"""), """\
3308one line
3309
3310two line""")
3311
3312
Ezio Melottib3aedd42010-11-20 19:04:17 +00003313
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003314# Test the Charset class
3315class TestCharset(unittest.TestCase):
3316 def tearDown(self):
3317 from email import charset as CharsetModule
3318 try:
3319 del CharsetModule.CHARSETS['fake']
3320 except KeyError:
3321 pass
3322
Guido van Rossum9604e662007-08-30 03:46:43 +00003323 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003324 eq = self.assertEqual
3325 # Make sure us-ascii = no Unicode conversion
3326 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00003327 eq(c.header_encode('Hello World!'), 'Hello World!')
3328 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003329 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00003330 self.assertRaises(UnicodeError, c.header_encode, s)
3331 c = Charset('utf-8')
3332 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003333
3334 def test_body_encode(self):
3335 eq = self.assertEqual
3336 # Try a charset with QP body encoding
3337 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003338 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003339 # Try a charset with Base64 body encoding
3340 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003341 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003342 # Try a charset with None body encoding
3343 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003344 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003345 # Try the convert argument, where input codec != output codec
3346 c = Charset('euc-jp')
3347 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00003348 # XXX FIXME
3349## try:
3350## eq('\x1b$B5FCO;~IW\x1b(B',
3351## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
3352## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
3353## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
3354## except LookupError:
3355## # We probably don't have the Japanese codecs installed
3356## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003357 # Testing SF bug #625509, which we have to fake, since there are no
3358 # built-in encodings where the header encoding is QP but the body
3359 # encoding is not.
3360 from email import charset as CharsetModule
3361 CharsetModule.add_charset('fake', CharsetModule.QP, None)
3362 c = Charset('fake')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003363 eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003364
3365 def test_unicode_charset_name(self):
3366 charset = Charset('us-ascii')
3367 self.assertEqual(str(charset), 'us-ascii')
3368 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
3369
3370
Ezio Melottib3aedd42010-11-20 19:04:17 +00003371
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003372# Test multilingual MIME headers.
3373class TestHeader(TestEmailBase):
3374 def test_simple(self):
3375 eq = self.ndiffAssertEqual
3376 h = Header('Hello World!')
3377 eq(h.encode(), 'Hello World!')
3378 h.append(' Goodbye World!')
3379 eq(h.encode(), 'Hello World! Goodbye World!')
3380
3381 def test_simple_surprise(self):
3382 eq = self.ndiffAssertEqual
3383 h = Header('Hello World!')
3384 eq(h.encode(), 'Hello World!')
3385 h.append('Goodbye World!')
3386 eq(h.encode(), 'Hello World! Goodbye World!')
3387
3388 def test_header_needs_no_decoding(self):
3389 h = 'no decoding needed'
3390 self.assertEqual(decode_header(h), [(h, None)])
3391
3392 def test_long(self):
3393 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
3394 maxlinelen=76)
3395 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003396 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003397
3398 def test_multilingual(self):
3399 eq = self.ndiffAssertEqual
3400 g = Charset("iso-8859-1")
3401 cz = Charset("iso-8859-2")
3402 utf8 = Charset("utf-8")
3403 g_head = (b'Die Mieter treten hier ein werden mit einem '
3404 b'Foerderband komfortabel den Korridor entlang, '
3405 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
3406 b'gegen die rotierenden Klingen bef\xf6rdert. ')
3407 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
3408 b'd\xf9vtipu.. ')
3409 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
3410 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
3411 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
3412 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
3413 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
3414 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
3415 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
3416 '\u3044\u307e\u3059\u3002')
3417 h = Header(g_head, g)
3418 h.append(cz_head, cz)
3419 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00003420 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003421 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00003422=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
3423 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
3424 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
3425 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003426 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
3427 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
3428 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
3429 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00003430 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
3431 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
3432 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
3433 decoded = decode_header(enc)
3434 eq(len(decoded), 3)
3435 eq(decoded[0], (g_head, 'iso-8859-1'))
3436 eq(decoded[1], (cz_head, 'iso-8859-2'))
3437 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003438 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00003439 eq(ustr,
3440 (b'Die Mieter treten hier ein werden mit einem Foerderband '
3441 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
3442 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
3443 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
3444 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
3445 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
3446 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
3447 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
3448 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
3449 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
3450 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
3451 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
3452 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
3453 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
3454 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
3455 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
3456 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003457 # Test make_header()
3458 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00003459 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003460
3461 def test_empty_header_encode(self):
3462 h = Header()
3463 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00003464
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003465 def test_header_ctor_default_args(self):
3466 eq = self.ndiffAssertEqual
3467 h = Header()
3468 eq(h, '')
3469 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00003470 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003471
3472 def test_explicit_maxlinelen(self):
3473 eq = self.ndiffAssertEqual
3474 hstr = ('A very long line that must get split to something other '
3475 'than at the 76th character boundary to test the non-default '
3476 'behavior')
3477 h = Header(hstr)
3478 eq(h.encode(), '''\
3479A very long line that must get split to something other than at the 76th
3480 character boundary to test the non-default behavior''')
3481 eq(str(h), hstr)
3482 h = Header(hstr, header_name='Subject')
3483 eq(h.encode(), '''\
3484A very long line that must get split to something other than at the
3485 76th character boundary to test the non-default behavior''')
3486 eq(str(h), hstr)
3487 h = Header(hstr, maxlinelen=1024, header_name='Subject')
3488 eq(h.encode(), hstr)
3489 eq(str(h), hstr)
3490
Guido van Rossum9604e662007-08-30 03:46:43 +00003491 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003492 eq = self.ndiffAssertEqual
3493 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003494 x = 'xxxx ' * 20
3495 h.append(x)
3496 s = h.encode()
3497 eq(s, """\
3498=?iso-8859-1?q?xxx?=
3499 =?iso-8859-1?q?x_?=
3500 =?iso-8859-1?q?xx?=
3501 =?iso-8859-1?q?xx?=
3502 =?iso-8859-1?q?_x?=
3503 =?iso-8859-1?q?xx?=
3504 =?iso-8859-1?q?x_?=
3505 =?iso-8859-1?q?xx?=
3506 =?iso-8859-1?q?xx?=
3507 =?iso-8859-1?q?_x?=
3508 =?iso-8859-1?q?xx?=
3509 =?iso-8859-1?q?x_?=
3510 =?iso-8859-1?q?xx?=
3511 =?iso-8859-1?q?xx?=
3512 =?iso-8859-1?q?_x?=
3513 =?iso-8859-1?q?xx?=
3514 =?iso-8859-1?q?x_?=
3515 =?iso-8859-1?q?xx?=
3516 =?iso-8859-1?q?xx?=
3517 =?iso-8859-1?q?_x?=
3518 =?iso-8859-1?q?xx?=
3519 =?iso-8859-1?q?x_?=
3520 =?iso-8859-1?q?xx?=
3521 =?iso-8859-1?q?xx?=
3522 =?iso-8859-1?q?_x?=
3523 =?iso-8859-1?q?xx?=
3524 =?iso-8859-1?q?x_?=
3525 =?iso-8859-1?q?xx?=
3526 =?iso-8859-1?q?xx?=
3527 =?iso-8859-1?q?_x?=
3528 =?iso-8859-1?q?xx?=
3529 =?iso-8859-1?q?x_?=
3530 =?iso-8859-1?q?xx?=
3531 =?iso-8859-1?q?xx?=
3532 =?iso-8859-1?q?_x?=
3533 =?iso-8859-1?q?xx?=
3534 =?iso-8859-1?q?x_?=
3535 =?iso-8859-1?q?xx?=
3536 =?iso-8859-1?q?xx?=
3537 =?iso-8859-1?q?_x?=
3538 =?iso-8859-1?q?xx?=
3539 =?iso-8859-1?q?x_?=
3540 =?iso-8859-1?q?xx?=
3541 =?iso-8859-1?q?xx?=
3542 =?iso-8859-1?q?_x?=
3543 =?iso-8859-1?q?xx?=
3544 =?iso-8859-1?q?x_?=
3545 =?iso-8859-1?q?xx?=
3546 =?iso-8859-1?q?xx?=
3547 =?iso-8859-1?q?_?=""")
3548 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003549 h = Header(charset='iso-8859-1', maxlinelen=40)
3550 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003551 s = h.encode()
3552 eq(s, """\
3553=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
3554 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
3555 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
3556 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
3557 =?iso-8859-1?q?_xxxx_xxxx_?=""")
3558 eq(x, str(make_header(decode_header(s))))
3559
3560 def test_base64_splittable(self):
3561 eq = self.ndiffAssertEqual
3562 h = Header(charset='koi8-r', maxlinelen=20)
3563 x = 'xxxx ' * 20
3564 h.append(x)
3565 s = h.encode()
3566 eq(s, """\
3567=?koi8-r?b?eHh4?=
3568 =?koi8-r?b?eCB4?=
3569 =?koi8-r?b?eHh4?=
3570 =?koi8-r?b?IHh4?=
3571 =?koi8-r?b?eHgg?=
3572 =?koi8-r?b?eHh4?=
3573 =?koi8-r?b?eCB4?=
3574 =?koi8-r?b?eHh4?=
3575 =?koi8-r?b?IHh4?=
3576 =?koi8-r?b?eHgg?=
3577 =?koi8-r?b?eHh4?=
3578 =?koi8-r?b?eCB4?=
3579 =?koi8-r?b?eHh4?=
3580 =?koi8-r?b?IHh4?=
3581 =?koi8-r?b?eHgg?=
3582 =?koi8-r?b?eHh4?=
3583 =?koi8-r?b?eCB4?=
3584 =?koi8-r?b?eHh4?=
3585 =?koi8-r?b?IHh4?=
3586 =?koi8-r?b?eHgg?=
3587 =?koi8-r?b?eHh4?=
3588 =?koi8-r?b?eCB4?=
3589 =?koi8-r?b?eHh4?=
3590 =?koi8-r?b?IHh4?=
3591 =?koi8-r?b?eHgg?=
3592 =?koi8-r?b?eHh4?=
3593 =?koi8-r?b?eCB4?=
3594 =?koi8-r?b?eHh4?=
3595 =?koi8-r?b?IHh4?=
3596 =?koi8-r?b?eHgg?=
3597 =?koi8-r?b?eHh4?=
3598 =?koi8-r?b?eCB4?=
3599 =?koi8-r?b?eHh4?=
3600 =?koi8-r?b?IA==?=""")
3601 eq(x, str(make_header(decode_header(s))))
3602 h = Header(charset='koi8-r', maxlinelen=40)
3603 h.append(x)
3604 s = h.encode()
3605 eq(s, """\
3606=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
3607 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
3608 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
3609 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
3610 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
3611 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
3612 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003613
3614 def test_us_ascii_header(self):
3615 eq = self.assertEqual
3616 s = 'hello'
3617 x = decode_header(s)
3618 eq(x, [('hello', None)])
3619 h = make_header(x)
3620 eq(s, h.encode())
3621
3622 def test_string_charset(self):
3623 eq = self.assertEqual
3624 h = Header()
3625 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00003626 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003627
3628## def test_unicode_error(self):
3629## raises = self.assertRaises
3630## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
3631## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
3632## h = Header()
3633## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
3634## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
3635## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
3636
3637 def test_utf8_shortest(self):
3638 eq = self.assertEqual
3639 h = Header('p\xf6stal', 'utf-8')
3640 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
3641 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
3642 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
3643
3644 def test_bad_8bit_header(self):
3645 raises = self.assertRaises
3646 eq = self.assertEqual
3647 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
3648 raises(UnicodeError, Header, x)
3649 h = Header()
3650 raises(UnicodeError, h.append, x)
3651 e = x.decode('utf-8', 'replace')
3652 eq(str(Header(x, errors='replace')), e)
3653 h.append(x, errors='replace')
3654 eq(str(h), e)
3655
3656 def test_encoded_adjacent_nonencoded(self):
3657 eq = self.assertEqual
3658 h = Header()
3659 h.append('hello', 'iso-8859-1')
3660 h.append('world')
3661 s = h.encode()
3662 eq(s, '=?iso-8859-1?q?hello?= world')
3663 h = make_header(decode_header(s))
3664 eq(h.encode(), s)
3665
3666 def test_whitespace_eater(self):
3667 eq = self.assertEqual
3668 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
3669 parts = decode_header(s)
3670 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
3671 hdr = make_header(parts)
3672 eq(hdr.encode(),
3673 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
3674
3675 def test_broken_base64_header(self):
3676 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00003677 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003678 raises(errors.HeaderParseError, decode_header, s)
3679
R. David Murray477efb32011-01-05 01:39:32 +00003680 def test_shift_jis_charset(self):
3681 h = Header('文', charset='shift_jis')
3682 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
3683
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003684
Ezio Melottib3aedd42010-11-20 19:04:17 +00003685
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003686# Test RFC 2231 header parameters (en/de)coding
3687class TestRFC2231(TestEmailBase):
3688 def test_get_param(self):
3689 eq = self.assertEqual
3690 msg = self._msgobj('msg_29.txt')
3691 eq(msg.get_param('title'),
3692 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3693 eq(msg.get_param('title', unquote=False),
3694 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
3695
3696 def test_set_param(self):
3697 eq = self.ndiffAssertEqual
3698 msg = Message()
3699 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3700 charset='us-ascii')
3701 eq(msg.get_param('title'),
3702 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
3703 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3704 charset='us-ascii', language='en')
3705 eq(msg.get_param('title'),
3706 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3707 msg = self._msgobj('msg_01.txt')
3708 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3709 charset='us-ascii', language='en')
3710 eq(msg.as_string(maxheaderlen=78), """\
3711Return-Path: <bbb@zzz.org>
3712Delivered-To: bbb@zzz.org
3713Received: by mail.zzz.org (Postfix, from userid 889)
3714\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3715MIME-Version: 1.0
3716Content-Transfer-Encoding: 7bit
3717Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3718From: bbb@ddd.com (John X. Doe)
3719To: bbb@zzz.org
3720Subject: This is a test message
3721Date: Fri, 4 May 2001 14:05:44 -0400
3722Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00003723 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003724
3725
3726Hi,
3727
3728Do you like this message?
3729
3730-Me
3731""")
3732
3733 def test_del_param(self):
3734 eq = self.ndiffAssertEqual
3735 msg = self._msgobj('msg_01.txt')
3736 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
3737 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3738 charset='us-ascii', language='en')
3739 msg.del_param('foo', header='Content-Type')
3740 eq(msg.as_string(maxheaderlen=78), """\
3741Return-Path: <bbb@zzz.org>
3742Delivered-To: bbb@zzz.org
3743Received: by mail.zzz.org (Postfix, from userid 889)
3744\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3745MIME-Version: 1.0
3746Content-Transfer-Encoding: 7bit
3747Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3748From: bbb@ddd.com (John X. Doe)
3749To: bbb@zzz.org
3750Subject: This is a test message
3751Date: Fri, 4 May 2001 14:05:44 -0400
3752Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00003753 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003754
3755
3756Hi,
3757
3758Do you like this message?
3759
3760-Me
3761""")
3762
3763 def test_rfc2231_get_content_charset(self):
3764 eq = self.assertEqual
3765 msg = self._msgobj('msg_32.txt')
3766 eq(msg.get_content_charset(), 'us-ascii')
3767
R. David Murraydfd7eb02010-12-24 22:36:49 +00003768 def test_rfc2231_parse_rfc_quoting(self):
3769 m = textwrap.dedent('''\
3770 Content-Disposition: inline;
3771 \tfilename*0*=''This%20is%20even%20more%20;
3772 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
3773 \tfilename*2="is it not.pdf"
3774
3775 ''')
3776 msg = email.message_from_string(m)
3777 self.assertEqual(msg.get_filename(),
3778 'This is even more ***fun*** is it not.pdf')
3779 self.assertEqual(m, msg.as_string())
3780
3781 def test_rfc2231_parse_extra_quoting(self):
3782 m = textwrap.dedent('''\
3783 Content-Disposition: inline;
3784 \tfilename*0*="''This%20is%20even%20more%20";
3785 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3786 \tfilename*2="is it not.pdf"
3787
3788 ''')
3789 msg = email.message_from_string(m)
3790 self.assertEqual(msg.get_filename(),
3791 'This is even more ***fun*** is it not.pdf')
3792 self.assertEqual(m, msg.as_string())
3793
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003794 def test_rfc2231_no_language_or_charset(self):
3795 m = '''\
3796Content-Transfer-Encoding: 8bit
3797Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
3798Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
3799
3800'''
3801 msg = email.message_from_string(m)
3802 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003803 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003804 self.assertEqual(
3805 param,
3806 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
3807
3808 def test_rfc2231_no_language_or_charset_in_filename(self):
3809 m = '''\
3810Content-Disposition: inline;
3811\tfilename*0*="''This%20is%20even%20more%20";
3812\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3813\tfilename*2="is it not.pdf"
3814
3815'''
3816 msg = email.message_from_string(m)
3817 self.assertEqual(msg.get_filename(),
3818 'This is even more ***fun*** is it not.pdf')
3819
3820 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
3821 m = '''\
3822Content-Disposition: inline;
3823\tfilename*0*="''This%20is%20even%20more%20";
3824\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3825\tfilename*2="is it not.pdf"
3826
3827'''
3828 msg = email.message_from_string(m)
3829 self.assertEqual(msg.get_filename(),
3830 'This is even more ***fun*** is it not.pdf')
3831
3832 def test_rfc2231_partly_encoded(self):
3833 m = '''\
3834Content-Disposition: inline;
3835\tfilename*0="''This%20is%20even%20more%20";
3836\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3837\tfilename*2="is it not.pdf"
3838
3839'''
3840 msg = email.message_from_string(m)
3841 self.assertEqual(
3842 msg.get_filename(),
3843 'This%20is%20even%20more%20***fun*** is it not.pdf')
3844
3845 def test_rfc2231_partly_nonencoded(self):
3846 m = '''\
3847Content-Disposition: inline;
3848\tfilename*0="This%20is%20even%20more%20";
3849\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
3850\tfilename*2="is it not.pdf"
3851
3852'''
3853 msg = email.message_from_string(m)
3854 self.assertEqual(
3855 msg.get_filename(),
3856 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
3857
3858 def test_rfc2231_no_language_or_charset_in_boundary(self):
3859 m = '''\
3860Content-Type: multipart/alternative;
3861\tboundary*0*="''This%20is%20even%20more%20";
3862\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
3863\tboundary*2="is it not.pdf"
3864
3865'''
3866 msg = email.message_from_string(m)
3867 self.assertEqual(msg.get_boundary(),
3868 'This is even more ***fun*** is it not.pdf')
3869
3870 def test_rfc2231_no_language_or_charset_in_charset(self):
3871 # This is a nonsensical charset value, but tests the code anyway
3872 m = '''\
3873Content-Type: text/plain;
3874\tcharset*0*="This%20is%20even%20more%20";
3875\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
3876\tcharset*2="is it not.pdf"
3877
3878'''
3879 msg = email.message_from_string(m)
3880 self.assertEqual(msg.get_content_charset(),
3881 'this is even more ***fun*** is it not.pdf')
3882
3883 def test_rfc2231_bad_encoding_in_filename(self):
3884 m = '''\
3885Content-Disposition: inline;
3886\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
3887\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3888\tfilename*2="is it not.pdf"
3889
3890'''
3891 msg = email.message_from_string(m)
3892 self.assertEqual(msg.get_filename(),
3893 'This is even more ***fun*** is it not.pdf')
3894
3895 def test_rfc2231_bad_encoding_in_charset(self):
3896 m = """\
3897Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
3898
3899"""
3900 msg = email.message_from_string(m)
3901 # This should return None because non-ascii characters in the charset
3902 # are not allowed.
3903 self.assertEqual(msg.get_content_charset(), None)
3904
3905 def test_rfc2231_bad_character_in_charset(self):
3906 m = """\
3907Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
3908
3909"""
3910 msg = email.message_from_string(m)
3911 # This should return None because non-ascii characters in the charset
3912 # are not allowed.
3913 self.assertEqual(msg.get_content_charset(), None)
3914
3915 def test_rfc2231_bad_character_in_filename(self):
3916 m = '''\
3917Content-Disposition: inline;
3918\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
3919\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3920\tfilename*2*="is it not.pdf%E2"
3921
3922'''
3923 msg = email.message_from_string(m)
3924 self.assertEqual(msg.get_filename(),
3925 'This is even more ***fun*** is it not.pdf\ufffd')
3926
3927 def test_rfc2231_unknown_encoding(self):
3928 m = """\
3929Content-Transfer-Encoding: 8bit
3930Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
3931
3932"""
3933 msg = email.message_from_string(m)
3934 self.assertEqual(msg.get_filename(), 'myfile.txt')
3935
3936 def test_rfc2231_single_tick_in_filename_extended(self):
3937 eq = self.assertEqual
3938 m = """\
3939Content-Type: application/x-foo;
3940\tname*0*=\"Frank's\"; name*1*=\" Document\"
3941
3942"""
3943 msg = email.message_from_string(m)
3944 charset, language, s = msg.get_param('name')
3945 eq(charset, None)
3946 eq(language, None)
3947 eq(s, "Frank's Document")
3948
3949 def test_rfc2231_single_tick_in_filename(self):
3950 m = """\
3951Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
3952
3953"""
3954 msg = email.message_from_string(m)
3955 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003956 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003957 self.assertEqual(param, "Frank's Document")
3958
3959 def test_rfc2231_tick_attack_extended(self):
3960 eq = self.assertEqual
3961 m = """\
3962Content-Type: application/x-foo;
3963\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
3964
3965"""
3966 msg = email.message_from_string(m)
3967 charset, language, s = msg.get_param('name')
3968 eq(charset, 'us-ascii')
3969 eq(language, 'en-us')
3970 eq(s, "Frank's Document")
3971
3972 def test_rfc2231_tick_attack(self):
3973 m = """\
3974Content-Type: application/x-foo;
3975\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
3976
3977"""
3978 msg = email.message_from_string(m)
3979 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003980 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003981 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
3982
3983 def test_rfc2231_no_extended_values(self):
3984 eq = self.assertEqual
3985 m = """\
3986Content-Type: application/x-foo; name=\"Frank's Document\"
3987
3988"""
3989 msg = email.message_from_string(m)
3990 eq(msg.get_param('name'), "Frank's Document")
3991
3992 def test_rfc2231_encoded_then_unencoded_segments(self):
3993 eq = self.assertEqual
3994 m = """\
3995Content-Type: application/x-foo;
3996\tname*0*=\"us-ascii'en-us'My\";
3997\tname*1=\" Document\";
3998\tname*2*=\" For You\"
3999
4000"""
4001 msg = email.message_from_string(m)
4002 charset, language, s = msg.get_param('name')
4003 eq(charset, 'us-ascii')
4004 eq(language, 'en-us')
4005 eq(s, 'My Document For You')
4006
4007 def test_rfc2231_unencoded_then_encoded_segments(self):
4008 eq = self.assertEqual
4009 m = """\
4010Content-Type: application/x-foo;
4011\tname*0=\"us-ascii'en-us'My\";
4012\tname*1*=\" Document\";
4013\tname*2*=\" For You\"
4014
4015"""
4016 msg = email.message_from_string(m)
4017 charset, language, s = msg.get_param('name')
4018 eq(charset, 'us-ascii')
4019 eq(language, 'en-us')
4020 eq(s, 'My Document For You')
4021
4022
Ezio Melottib3aedd42010-11-20 19:04:17 +00004023
R. David Murraya8f480f2010-01-16 18:30:03 +00004024# Tests to ensure that signed parts of an email are completely preserved, as
4025# required by RFC1847 section 2.1. Note that these are incomplete, because the
4026# email package does not currently always preserve the body. See issue 1670765.
4027class TestSigned(TestEmailBase):
4028
4029 def _msg_and_obj(self, filename):
4030 with openfile(findfile(filename)) as fp:
4031 original = fp.read()
4032 msg = email.message_from_string(original)
4033 return original, msg
4034
4035 def _signed_parts_eq(self, original, result):
4036 # Extract the first mime part of each message
4037 import re
4038 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4039 inpart = repart.search(original).group(2)
4040 outpart = repart.search(result).group(2)
4041 self.assertEqual(outpart, inpart)
4042
4043 def test_long_headers_as_string(self):
4044 original, msg = self._msg_and_obj('msg_45.txt')
4045 result = msg.as_string()
4046 self._signed_parts_eq(original, result)
4047
4048 def test_long_headers_as_string_maxheaderlen(self):
4049 original, msg = self._msg_and_obj('msg_45.txt')
4050 result = msg.as_string(maxheaderlen=60)
4051 self._signed_parts_eq(original, result)
4052
4053 def test_long_headers_flatten(self):
4054 original, msg = self._msg_and_obj('msg_45.txt')
4055 fp = StringIO()
4056 Generator(fp).flatten(msg)
4057 result = fp.getvalue()
4058 self._signed_parts_eq(original, result)
4059
4060
Ezio Melottib3aedd42010-11-20 19:04:17 +00004061
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004062def _testclasses():
4063 mod = sys.modules[__name__]
4064 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
4065
4066
4067def suite():
4068 suite = unittest.TestSuite()
4069 for testclass in _testclasses():
4070 suite.addTest(unittest.makeSuite(testclass))
4071 return suite
4072
4073
4074def test_main():
4075 for testclass in _testclasses():
4076 run_unittest(testclass)
4077
4078
Ezio Melottib3aedd42010-11-20 19:04:17 +00004079
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004080if __name__ == '__main__':
4081 unittest.main(defaultTest='suite')