blob: e4083ad63a62fa93f50d08a92d90554da426cb64 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R. David Murray96fd54e2010-10-08 15:55:28 +000039from test.support import findfile, run_unittest, unlink
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040from email.test import __file__ as landmark
41
42
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Ezio Melottib3aedd42010-11-20 19:04:17 +000048
Guido van Rossum8b3febe2007-08-30 01:15:14 +000049def openfile(filename, *args, **kws):
50 path = os.path.join(os.path.dirname(landmark), 'data', filename)
51 return open(path, *args, **kws)
52
53
Ezio Melottib3aedd42010-11-20 19:04:17 +000054
Guido van Rossum8b3febe2007-08-30 01:15:14 +000055# Base test class
56class TestEmailBase(unittest.TestCase):
57 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000058 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000059 if first != second:
60 sfirst = str(first)
61 ssecond = str(second)
62 rfirst = [repr(line) for line in sfirst.splitlines()]
63 rsecond = [repr(line) for line in ssecond.splitlines()]
64 diff = difflib.ndiff(rfirst, rsecond)
65 raise self.failureException(NL + NL.join(diff))
66
67 def _msgobj(self, filename):
68 with openfile(findfile(filename)) as fp:
69 return email.message_from_file(fp)
70
71
Ezio Melottib3aedd42010-11-20 19:04:17 +000072
Guido van Rossum8b3febe2007-08-30 01:15:14 +000073# Test various aspects of the Message class's API
74class TestMessageAPI(TestEmailBase):
75 def test_get_all(self):
76 eq = self.assertEqual
77 msg = self._msgobj('msg_20.txt')
78 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
79 eq(msg.get_all('xx', 'n/a'), 'n/a')
80
R. David Murraye5db2632010-11-20 15:10:13 +000081 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 eq = self.assertEqual
83 msg = Message()
84 eq(msg.get_charset(), None)
85 charset = Charset('iso-8859-1')
86 msg.set_charset(charset)
87 eq(msg['mime-version'], '1.0')
88 eq(msg.get_content_type(), 'text/plain')
89 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
90 eq(msg.get_param('charset'), 'iso-8859-1')
91 eq(msg['content-transfer-encoding'], 'quoted-printable')
92 eq(msg.get_charset().input_charset, 'iso-8859-1')
93 # Remove the charset
94 msg.set_charset(None)
95 eq(msg.get_charset(), None)
96 eq(msg['content-type'], 'text/plain')
97 # Try adding a charset when there's already MIME headers present
98 msg = Message()
99 msg['MIME-Version'] = '2.0'
100 msg['Content-Type'] = 'text/x-weird'
101 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
102 msg.set_charset(charset)
103 eq(msg['mime-version'], '2.0')
104 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
105 eq(msg['content-transfer-encoding'], 'quinted-puntable')
106
107 def test_set_charset_from_string(self):
108 eq = self.assertEqual
109 msg = Message()
110 msg.set_charset('us-ascii')
111 eq(msg.get_charset().input_charset, 'us-ascii')
112 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
113
114 def test_set_payload_with_charset(self):
115 msg = Message()
116 charset = Charset('iso-8859-1')
117 msg.set_payload('This is a string payload', charset)
118 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
119
120 def test_get_charsets(self):
121 eq = self.assertEqual
122
123 msg = self._msgobj('msg_08.txt')
124 charsets = msg.get_charsets()
125 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
126
127 msg = self._msgobj('msg_09.txt')
128 charsets = msg.get_charsets('dingbat')
129 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
130 'koi8-r'])
131
132 msg = self._msgobj('msg_12.txt')
133 charsets = msg.get_charsets()
134 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
135 'iso-8859-3', 'us-ascii', 'koi8-r'])
136
137 def test_get_filename(self):
138 eq = self.assertEqual
139
140 msg = self._msgobj('msg_04.txt')
141 filenames = [p.get_filename() for p in msg.get_payload()]
142 eq(filenames, ['msg.txt', 'msg.txt'])
143
144 msg = self._msgobj('msg_07.txt')
145 subpart = msg.get_payload(1)
146 eq(subpart.get_filename(), 'dingusfish.gif')
147
148 def test_get_filename_with_name_parameter(self):
149 eq = self.assertEqual
150
151 msg = self._msgobj('msg_44.txt')
152 filenames = [p.get_filename() for p in msg.get_payload()]
153 eq(filenames, ['msg.txt', 'msg.txt'])
154
155 def test_get_boundary(self):
156 eq = self.assertEqual
157 msg = self._msgobj('msg_07.txt')
158 # No quotes!
159 eq(msg.get_boundary(), 'BOUNDARY')
160
161 def test_set_boundary(self):
162 eq = self.assertEqual
163 # This one has no existing boundary parameter, but the Content-Type:
164 # header appears fifth.
165 msg = self._msgobj('msg_01.txt')
166 msg.set_boundary('BOUNDARY')
167 header, value = msg.items()[4]
168 eq(header.lower(), 'content-type')
169 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
170 # This one has a Content-Type: header, with a boundary, stuck in the
171 # middle of its headers. Make sure the order is preserved; it should
172 # be fifth.
173 msg = self._msgobj('msg_04.txt')
174 msg.set_boundary('BOUNDARY')
175 header, value = msg.items()[4]
176 eq(header.lower(), 'content-type')
177 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
178 # And this one has no Content-Type: header at all.
179 msg = self._msgobj('msg_03.txt')
180 self.assertRaises(errors.HeaderParseError,
181 msg.set_boundary, 'BOUNDARY')
182
R. David Murray73a559d2010-12-21 18:07:59 +0000183 def test_make_boundary(self):
184 msg = MIMEMultipart('form-data')
185 # Note that when the boundary gets created is an implementation
186 # detail and might change.
187 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
188 # Trigger creation of boundary
189 msg.as_string()
190 self.assertEqual(msg.items()[0][1][:33],
191 'multipart/form-data; boundary="==')
192 # XXX: there ought to be tests of the uniqueness of the boundary, too.
193
R. David Murray57c45ac2010-02-21 04:39:40 +0000194 def test_message_rfc822_only(self):
195 # Issue 7970: message/rfc822 not in multipart parsed by
196 # HeaderParser caused an exception when flattened.
Brett Cannon384917a2010-10-29 23:08:36 +0000197 with openfile(findfile('msg_46.txt')) as fp:
198 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000199 parser = HeaderParser()
200 msg = parser.parsestr(msgdata)
201 out = StringIO()
202 gen = Generator(out, True, 0)
203 gen.flatten(msg, False)
204 self.assertEqual(out.getvalue(), msgdata)
205
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000206 def test_get_decoded_payload(self):
207 eq = self.assertEqual
208 msg = self._msgobj('msg_10.txt')
209 # The outer message is a multipart
210 eq(msg.get_payload(decode=True), None)
211 # Subpart 1 is 7bit encoded
212 eq(msg.get_payload(0).get_payload(decode=True),
213 b'This is a 7bit encoded message.\n')
214 # Subpart 2 is quopri
215 eq(msg.get_payload(1).get_payload(decode=True),
216 b'\xa1This is a Quoted Printable encoded message!\n')
217 # Subpart 3 is base64
218 eq(msg.get_payload(2).get_payload(decode=True),
219 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000220 # Subpart 4 is base64 with a trailing newline, which
221 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000222 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000223 b'This is a Base64 encoded message.\n')
224 # Subpart 5 has no Content-Transfer-Encoding: header.
225 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000226 b'This has no Content-Transfer-Encoding: header.\n')
227
228 def test_get_decoded_uu_payload(self):
229 eq = self.assertEqual
230 msg = Message()
231 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
232 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
233 msg['content-transfer-encoding'] = cte
234 eq(msg.get_payload(decode=True), b'hello world')
235 # Now try some bogus data
236 msg.set_payload('foo')
237 eq(msg.get_payload(decode=True), b'foo')
238
239 def test_decoded_generator(self):
240 eq = self.assertEqual
241 msg = self._msgobj('msg_07.txt')
242 with openfile('msg_17.txt') as fp:
243 text = fp.read()
244 s = StringIO()
245 g = DecodedGenerator(s)
246 g.flatten(msg)
247 eq(s.getvalue(), text)
248
249 def test__contains__(self):
250 msg = Message()
251 msg['From'] = 'Me'
252 msg['to'] = 'You'
253 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000254 self.assertTrue('from' in msg)
255 self.assertTrue('From' in msg)
256 self.assertTrue('FROM' in msg)
257 self.assertTrue('to' in msg)
258 self.assertTrue('To' in msg)
259 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000260
261 def test_as_string(self):
262 eq = self.ndiffAssertEqual
263 msg = self._msgobj('msg_01.txt')
264 with openfile('msg_01.txt') as fp:
265 text = fp.read()
266 eq(text, str(msg))
267 fullrepr = msg.as_string(unixfrom=True)
268 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000269 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000270 eq(text, NL.join(lines[1:]))
271
272 def test_bad_param(self):
273 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
274 self.assertEqual(msg.get_param('baz'), '')
275
276 def test_missing_filename(self):
277 msg = email.message_from_string("From: foo\n")
278 self.assertEqual(msg.get_filename(), None)
279
280 def test_bogus_filename(self):
281 msg = email.message_from_string(
282 "Content-Disposition: blarg; filename\n")
283 self.assertEqual(msg.get_filename(), '')
284
285 def test_missing_boundary(self):
286 msg = email.message_from_string("From: foo\n")
287 self.assertEqual(msg.get_boundary(), None)
288
289 def test_get_params(self):
290 eq = self.assertEqual
291 msg = email.message_from_string(
292 'X-Header: foo=one; bar=two; baz=three\n')
293 eq(msg.get_params(header='x-header'),
294 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
295 msg = email.message_from_string(
296 'X-Header: foo; bar=one; baz=two\n')
297 eq(msg.get_params(header='x-header'),
298 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
299 eq(msg.get_params(), None)
300 msg = email.message_from_string(
301 'X-Header: foo; bar="one"; baz=two\n')
302 eq(msg.get_params(header='x-header'),
303 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
304
305 def test_get_param_liberal(self):
306 msg = Message()
307 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
308 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
309
310 def test_get_param(self):
311 eq = self.assertEqual
312 msg = email.message_from_string(
313 "X-Header: foo=one; bar=two; baz=three\n")
314 eq(msg.get_param('bar', header='x-header'), 'two')
315 eq(msg.get_param('quuz', header='x-header'), None)
316 eq(msg.get_param('quuz'), None)
317 msg = email.message_from_string(
318 'X-Header: foo; bar="one"; baz=two\n')
319 eq(msg.get_param('foo', header='x-header'), '')
320 eq(msg.get_param('bar', header='x-header'), 'one')
321 eq(msg.get_param('baz', header='x-header'), 'two')
322 # XXX: We are not RFC-2045 compliant! We cannot parse:
323 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
324 # msg.get_param("weird")
325 # yet.
326
327 def test_get_param_funky_continuation_lines(self):
328 msg = self._msgobj('msg_22.txt')
329 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
330
331 def test_get_param_with_semis_in_quotes(self):
332 msg = email.message_from_string(
333 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
334 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
335 self.assertEqual(msg.get_param('name', unquote=False),
336 '"Jim&amp;&amp;Jill"')
337
R. David Murrayd48739f2010-04-14 18:59:18 +0000338 def test_get_param_with_quotes(self):
339 msg = email.message_from_string(
340 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
341 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
342 msg = email.message_from_string(
343 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
344 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
345
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000346 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000347 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000348 msg = email.message_from_string('Header: exists')
349 unless('header' in msg)
350 unless('Header' in msg)
351 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000352 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000353
354 def test_set_param(self):
355 eq = self.assertEqual
356 msg = Message()
357 msg.set_param('charset', 'iso-2022-jp')
358 eq(msg.get_param('charset'), 'iso-2022-jp')
359 msg.set_param('importance', 'high value')
360 eq(msg.get_param('importance'), 'high value')
361 eq(msg.get_param('importance', unquote=False), '"high value"')
362 eq(msg.get_params(), [('text/plain', ''),
363 ('charset', 'iso-2022-jp'),
364 ('importance', 'high value')])
365 eq(msg.get_params(unquote=False), [('text/plain', ''),
366 ('charset', '"iso-2022-jp"'),
367 ('importance', '"high value"')])
368 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
369 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
370
371 def test_del_param(self):
372 eq = self.assertEqual
373 msg = self._msgobj('msg_05.txt')
374 eq(msg.get_params(),
375 [('multipart/report', ''), ('report-type', 'delivery-status'),
376 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
377 old_val = msg.get_param("report-type")
378 msg.del_param("report-type")
379 eq(msg.get_params(),
380 [('multipart/report', ''),
381 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
382 msg.set_param("report-type", old_val)
383 eq(msg.get_params(),
384 [('multipart/report', ''),
385 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
386 ('report-type', old_val)])
387
388 def test_del_param_on_other_header(self):
389 msg = Message()
390 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
391 msg.del_param('filename', 'content-disposition')
392 self.assertEqual(msg['content-disposition'], 'attachment')
393
394 def test_set_type(self):
395 eq = self.assertEqual
396 msg = Message()
397 self.assertRaises(ValueError, msg.set_type, 'text')
398 msg.set_type('text/plain')
399 eq(msg['content-type'], 'text/plain')
400 msg.set_param('charset', 'us-ascii')
401 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
402 msg.set_type('text/html')
403 eq(msg['content-type'], 'text/html; charset="us-ascii"')
404
405 def test_set_type_on_other_header(self):
406 msg = Message()
407 msg['X-Content-Type'] = 'text/plain'
408 msg.set_type('application/octet-stream', 'X-Content-Type')
409 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
410
411 def test_get_content_type_missing(self):
412 msg = Message()
413 self.assertEqual(msg.get_content_type(), 'text/plain')
414
415 def test_get_content_type_missing_with_default_type(self):
416 msg = Message()
417 msg.set_default_type('message/rfc822')
418 self.assertEqual(msg.get_content_type(), 'message/rfc822')
419
420 def test_get_content_type_from_message_implicit(self):
421 msg = self._msgobj('msg_30.txt')
422 self.assertEqual(msg.get_payload(0).get_content_type(),
423 'message/rfc822')
424
425 def test_get_content_type_from_message_explicit(self):
426 msg = self._msgobj('msg_28.txt')
427 self.assertEqual(msg.get_payload(0).get_content_type(),
428 'message/rfc822')
429
430 def test_get_content_type_from_message_text_plain_implicit(self):
431 msg = self._msgobj('msg_03.txt')
432 self.assertEqual(msg.get_content_type(), 'text/plain')
433
434 def test_get_content_type_from_message_text_plain_explicit(self):
435 msg = self._msgobj('msg_01.txt')
436 self.assertEqual(msg.get_content_type(), 'text/plain')
437
438 def test_get_content_maintype_missing(self):
439 msg = Message()
440 self.assertEqual(msg.get_content_maintype(), 'text')
441
442 def test_get_content_maintype_missing_with_default_type(self):
443 msg = Message()
444 msg.set_default_type('message/rfc822')
445 self.assertEqual(msg.get_content_maintype(), 'message')
446
447 def test_get_content_maintype_from_message_implicit(self):
448 msg = self._msgobj('msg_30.txt')
449 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
450
451 def test_get_content_maintype_from_message_explicit(self):
452 msg = self._msgobj('msg_28.txt')
453 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
454
455 def test_get_content_maintype_from_message_text_plain_implicit(self):
456 msg = self._msgobj('msg_03.txt')
457 self.assertEqual(msg.get_content_maintype(), 'text')
458
459 def test_get_content_maintype_from_message_text_plain_explicit(self):
460 msg = self._msgobj('msg_01.txt')
461 self.assertEqual(msg.get_content_maintype(), 'text')
462
463 def test_get_content_subtype_missing(self):
464 msg = Message()
465 self.assertEqual(msg.get_content_subtype(), 'plain')
466
467 def test_get_content_subtype_missing_with_default_type(self):
468 msg = Message()
469 msg.set_default_type('message/rfc822')
470 self.assertEqual(msg.get_content_subtype(), 'rfc822')
471
472 def test_get_content_subtype_from_message_implicit(self):
473 msg = self._msgobj('msg_30.txt')
474 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
475
476 def test_get_content_subtype_from_message_explicit(self):
477 msg = self._msgobj('msg_28.txt')
478 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
479
480 def test_get_content_subtype_from_message_text_plain_implicit(self):
481 msg = self._msgobj('msg_03.txt')
482 self.assertEqual(msg.get_content_subtype(), 'plain')
483
484 def test_get_content_subtype_from_message_text_plain_explicit(self):
485 msg = self._msgobj('msg_01.txt')
486 self.assertEqual(msg.get_content_subtype(), 'plain')
487
488 def test_get_content_maintype_error(self):
489 msg = Message()
490 msg['Content-Type'] = 'no-slash-in-this-string'
491 self.assertEqual(msg.get_content_maintype(), 'text')
492
493 def test_get_content_subtype_error(self):
494 msg = Message()
495 msg['Content-Type'] = 'no-slash-in-this-string'
496 self.assertEqual(msg.get_content_subtype(), 'plain')
497
498 def test_replace_header(self):
499 eq = self.assertEqual
500 msg = Message()
501 msg.add_header('First', 'One')
502 msg.add_header('Second', 'Two')
503 msg.add_header('Third', 'Three')
504 eq(msg.keys(), ['First', 'Second', 'Third'])
505 eq(msg.values(), ['One', 'Two', 'Three'])
506 msg.replace_header('Second', 'Twenty')
507 eq(msg.keys(), ['First', 'Second', 'Third'])
508 eq(msg.values(), ['One', 'Twenty', 'Three'])
509 msg.add_header('First', 'Eleven')
510 msg.replace_header('First', 'One Hundred')
511 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
512 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
513 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
514
515 def test_broken_base64_payload(self):
516 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
517 msg = Message()
518 msg['content-type'] = 'audio/x-midi'
519 msg['content-transfer-encoding'] = 'base64'
520 msg.set_payload(x)
521 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000522 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000523
R. David Murray7ec754b2010-12-13 23:51:19 +0000524 # Issue 1078919
525 def test_ascii_add_header(self):
526 msg = Message()
527 msg.add_header('Content-Disposition', 'attachment',
528 filename='bud.gif')
529 self.assertEqual('attachment; filename="bud.gif"',
530 msg['Content-Disposition'])
531
532 def test_noascii_add_header(self):
533 msg = Message()
534 msg.add_header('Content-Disposition', 'attachment',
535 filename="Fußballer.ppt")
536 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000537 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000538 msg['Content-Disposition'])
539
540 def test_nonascii_add_header_via_triple(self):
541 msg = Message()
542 msg.add_header('Content-Disposition', 'attachment',
543 filename=('iso-8859-1', '', 'Fußballer.ppt'))
544 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000545 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
546 msg['Content-Disposition'])
547
548 def test_ascii_add_header_with_tspecial(self):
549 msg = Message()
550 msg.add_header('Content-Disposition', 'attachment',
551 filename="windows [filename].ppt")
552 self.assertEqual(
553 'attachment; filename="windows [filename].ppt"',
554 msg['Content-Disposition'])
555
556 def test_nonascii_add_header_with_tspecial(self):
557 msg = Message()
558 msg.add_header('Content-Disposition', 'attachment',
559 filename="Fußballer [filename].ppt")
560 self.assertEqual(
561 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000562 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000563
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000564 # Issue 5871: reject an attempt to embed a header inside a header value
565 # (header injection attack).
566 def test_embeded_header_via_Header_rejected(self):
567 msg = Message()
568 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
569 self.assertRaises(errors.HeaderParseError, msg.as_string)
570
571 def test_embeded_header_via_string_rejected(self):
572 msg = Message()
573 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
574 self.assertRaises(errors.HeaderParseError, msg.as_string)
575
Ezio Melottib3aedd42010-11-20 19:04:17 +0000576
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000577# Test the email.encoders module
578class TestEncoders(unittest.TestCase):
579 def test_encode_empty_payload(self):
580 eq = self.assertEqual
581 msg = Message()
582 msg.set_charset('us-ascii')
583 eq(msg['content-transfer-encoding'], '7bit')
584
585 def test_default_cte(self):
586 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000587 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000588 msg = MIMEText('hello world')
589 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000590 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000591 msg = MIMEText('hello \xf8 world')
592 eq(msg['content-transfer-encoding'], '8bit')
593 # And now with a different charset
594 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
595 eq(msg['content-transfer-encoding'], 'quoted-printable')
596
R. David Murraye85200d2010-05-06 01:41:14 +0000597 def test_encode7or8bit(self):
598 # Make sure a charset whose input character set is 8bit but
599 # whose output character set is 7bit gets a transfer-encoding
600 # of 7bit.
601 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000602 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000603 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000604
Ezio Melottib3aedd42010-11-20 19:04:17 +0000605
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000606# Test long header wrapping
607class TestLongHeaders(TestEmailBase):
608 def test_split_long_continuation(self):
609 eq = self.ndiffAssertEqual
610 msg = email.message_from_string("""\
611Subject: bug demonstration
612\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
613\tmore text
614
615test
616""")
617 sfp = StringIO()
618 g = Generator(sfp)
619 g.flatten(msg)
620 eq(sfp.getvalue(), """\
621Subject: bug demonstration
622\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
623\tmore text
624
625test
626""")
627
628 def test_another_long_almost_unsplittable_header(self):
629 eq = self.ndiffAssertEqual
630 hstr = """\
631bug demonstration
632\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
633\tmore text"""
634 h = Header(hstr, continuation_ws='\t')
635 eq(h.encode(), """\
636bug demonstration
637\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
638\tmore text""")
639 h = Header(hstr.replace('\t', ' '))
640 eq(h.encode(), """\
641bug demonstration
642 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
643 more text""")
644
645 def test_long_nonstring(self):
646 eq = self.ndiffAssertEqual
647 g = Charset("iso-8859-1")
648 cz = Charset("iso-8859-2")
649 utf8 = Charset("utf-8")
650 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
651 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
652 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
653 b'bef\xf6rdert. ')
654 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
655 b'd\xf9vtipu.. ')
656 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
657 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
658 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
659 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
660 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
661 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
662 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
663 '\u3044\u307e\u3059\u3002')
664 h = Header(g_head, g, header_name='Subject')
665 h.append(cz_head, cz)
666 h.append(utf8_head, utf8)
667 msg = Message()
668 msg['Subject'] = h
669 sfp = StringIO()
670 g = Generator(sfp)
671 g.flatten(msg)
672 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000673Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
674 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
675 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
676 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
677 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
678 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
679 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
680 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
681 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
682 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
683 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000684
685""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000686 eq(h.encode(maxlinelen=76), """\
687=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
688 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
689 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
690 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
691 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
692 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
693 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
694 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
695 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
696 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
697 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000698
699 def test_long_header_encode(self):
700 eq = self.ndiffAssertEqual
701 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
702 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
703 header_name='X-Foobar-Spoink-Defrobnit')
704 eq(h.encode(), '''\
705wasnipoop; giraffes="very-long-necked-animals";
706 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
707
708 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
709 eq = self.ndiffAssertEqual
710 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
711 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
712 header_name='X-Foobar-Spoink-Defrobnit',
713 continuation_ws='\t')
714 eq(h.encode(), '''\
715wasnipoop; giraffes="very-long-necked-animals";
716 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
717
718 def test_long_header_encode_with_tab_continuation(self):
719 eq = self.ndiffAssertEqual
720 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
721 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
722 header_name='X-Foobar-Spoink-Defrobnit',
723 continuation_ws='\t')
724 eq(h.encode(), '''\
725wasnipoop; giraffes="very-long-necked-animals";
726\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
727
728 def test_header_splitter(self):
729 eq = self.ndiffAssertEqual
730 msg = MIMEText('')
731 # It'd be great if we could use add_header() here, but that doesn't
732 # guarantee an order of the parameters.
733 msg['X-Foobar-Spoink-Defrobnit'] = (
734 'wasnipoop; giraffes="very-long-necked-animals"; '
735 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
736 sfp = StringIO()
737 g = Generator(sfp)
738 g.flatten(msg)
739 eq(sfp.getvalue(), '''\
740Content-Type: text/plain; charset="us-ascii"
741MIME-Version: 1.0
742Content-Transfer-Encoding: 7bit
743X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
744 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
745
746''')
747
748 def test_no_semis_header_splitter(self):
749 eq = self.ndiffAssertEqual
750 msg = Message()
751 msg['From'] = 'test@dom.ain'
752 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
753 msg.set_payload('Test')
754 sfp = StringIO()
755 g = Generator(sfp)
756 g.flatten(msg)
757 eq(sfp.getvalue(), """\
758From: test@dom.ain
759References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
760 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
761
762Test""")
763
764 def test_no_split_long_header(self):
765 eq = self.ndiffAssertEqual
766 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000767 h = Header(hstr)
768 # These come on two lines because Headers are really field value
769 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000770 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000771References:
772 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
773 h = Header('x' * 80)
774 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000775
776 def test_splitting_multiple_long_lines(self):
777 eq = self.ndiffAssertEqual
778 hstr = """\
779from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
780\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
781\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
782"""
783 h = Header(hstr, continuation_ws='\t')
784 eq(h.encode(), """\
785from babylon.socal-raves.org (localhost [127.0.0.1]);
786 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
787 for <mailman-admin@babylon.socal-raves.org>;
788 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
789\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
790 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
791 for <mailman-admin@babylon.socal-raves.org>;
792 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
793\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
794 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
795 for <mailman-admin@babylon.socal-raves.org>;
796 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
797
798 def test_splitting_first_line_only_is_long(self):
799 eq = self.ndiffAssertEqual
800 hstr = """\
801from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
802\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
803\tid 17k4h5-00034i-00
804\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
805 h = Header(hstr, maxlinelen=78, header_name='Received',
806 continuation_ws='\t')
807 eq(h.encode(), """\
808from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
809 helo=cthulhu.gerg.ca)
810\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
811\tid 17k4h5-00034i-00
812\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
813
814 def test_long_8bit_header(self):
815 eq = self.ndiffAssertEqual
816 msg = Message()
817 h = Header('Britische Regierung gibt', 'iso-8859-1',
818 header_name='Subject')
819 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +0000820 eq(h.encode(maxlinelen=76), """\
821=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
822 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000823 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +0000824 eq(msg.as_string(maxheaderlen=76), """\
825Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
826 =?iso-8859-1?q?hore-Windkraftprojekte?=
827
828""")
829 eq(msg.as_string(maxheaderlen=0), """\
830Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000831
832""")
833
834 def test_long_8bit_header_no_charset(self):
835 eq = self.ndiffAssertEqual
836 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +0000837 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
838 'f\xfcr Offshore-Windkraftprojekte '
839 '<a-very-long-address@example.com>')
840 msg['Reply-To'] = header_string
841 self.assertRaises(UnicodeEncodeError, msg.as_string)
842 msg = Message()
843 msg['Reply-To'] = Header(header_string, 'utf-8',
844 header_name='Reply-To')
845 eq(msg.as_string(maxheaderlen=78), """\
846Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
847 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000848
849""")
850
851 def test_long_to_header(self):
852 eq = self.ndiffAssertEqual
853 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
854 '<someone@eecs.umich.edu>,'
855 '"Someone Test #B" <someone@umich.edu>, '
856 '"Someone Test #C" <someone@eecs.umich.edu>, '
857 '"Someone Test #D" <someone@eecs.umich.edu>')
858 msg = Message()
859 msg['To'] = to
860 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +0000861To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000862 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +0000863 "Someone Test #C" <someone@eecs.umich.edu>,
864 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000865
866''')
867
868 def test_long_line_after_append(self):
869 eq = self.ndiffAssertEqual
870 s = 'This is an example of string which has almost the limit of header length.'
871 h = Header(s)
872 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +0000873 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000874This is an example of string which has almost the limit of header length.
875 Add another line.""")
876
877 def test_shorter_line_with_append(self):
878 eq = self.ndiffAssertEqual
879 s = 'This is a shorter line.'
880 h = Header(s)
881 h.append('Add another sentence. (Surprise?)')
882 eq(h.encode(),
883 'This is a shorter line. Add another sentence. (Surprise?)')
884
885 def test_long_field_name(self):
886 eq = self.ndiffAssertEqual
887 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +0000888 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
889 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
890 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
891 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000892 h = Header(gs, 'iso-8859-1', header_name=fn)
893 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +0000894 eq(h.encode(maxlinelen=76), """\
895=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
896 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
897 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
898 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000899
900 def test_long_received_header(self):
901 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
902 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
903 'Wed, 05 Mar 2003 18:10:18 -0700')
904 msg = Message()
905 msg['Received-1'] = Header(h, continuation_ws='\t')
906 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000907 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000908 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000909Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
910 Wed, 05 Mar 2003 18:10:18 -0700
911Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
912 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000913
914""")
915
916 def test_string_headerinst_eq(self):
917 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
918 'tu-muenchen.de> (David Bremner\'s message of '
919 '"Thu, 6 Mar 2003 13:58:21 +0100")')
920 msg = Message()
921 msg['Received-1'] = Header(h, header_name='Received-1',
922 continuation_ws='\t')
923 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000924 # XXX This should be splitting on spaces not commas.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000925 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000926Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
927 6 Mar 2003 13:58:21 +0100\")
928Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
929 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000930
931""")
932
933 def test_long_unbreakable_lines_with_continuation(self):
934 eq = self.ndiffAssertEqual
935 msg = Message()
936 t = """\
937iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
938 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
939 msg['Face-1'] = t
940 msg['Face-2'] = Header(t, header_name='Face-2')
Barry Warsawbef9d212007-08-31 10:55:37 +0000941 # XXX This splitting is all wrong. It the first value line should be
942 # snug against the field name.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000943 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +0000944Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000945 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000946 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +0000947Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +0000948 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000949 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
950
951""")
952
953 def test_another_long_multiline_header(self):
954 eq = self.ndiffAssertEqual
955 m = ('Received: from siimage.com '
956 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +0000957 'Microsoft SMTPSVC(5.0.2195.4905); '
958 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000959 msg = email.message_from_string(m)
960 eq(msg.as_string(maxheaderlen=78), '''\
Barry Warsawbef9d212007-08-31 10:55:37 +0000961Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
962 Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000963
964''')
965
966 def test_long_lines_with_different_header(self):
967 eq = self.ndiffAssertEqual
968 h = ('List-Unsubscribe: '
969 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
970 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
971 '?subject=unsubscribe>')
972 msg = Message()
973 msg['List'] = h
974 msg['List'] = Header(h, header_name='List')
975 eq(msg.as_string(maxheaderlen=78), """\
976List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000977 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000978List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000979 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000980
981""")
982
R. David Murray6f0022d2011-01-07 21:57:25 +0000983 def test_long_rfc2047_header_with_embedded_fws(self):
984 h = Header(textwrap.dedent("""\
985 We're going to pretend this header is in a non-ascii character set
986 \tto see if line wrapping with encoded words and embedded
987 folding white space works"""),
988 charset='utf-8',
989 header_name='Test')
990 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
991 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
992 =?utf-8?q?cter_set?=
993 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
994 =?utf-8?q?_folding_white_space_works?=""")+'\n')
995
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000996
Ezio Melottib3aedd42010-11-20 19:04:17 +0000997
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000998# Test mangling of "From " lines in the body of a message
999class TestFromMangling(unittest.TestCase):
1000 def setUp(self):
1001 self.msg = Message()
1002 self.msg['From'] = 'aaa@bbb.org'
1003 self.msg.set_payload("""\
1004From the desk of A.A.A.:
1005Blah blah blah
1006""")
1007
1008 def test_mangled_from(self):
1009 s = StringIO()
1010 g = Generator(s, mangle_from_=True)
1011 g.flatten(self.msg)
1012 self.assertEqual(s.getvalue(), """\
1013From: aaa@bbb.org
1014
1015>From the desk of A.A.A.:
1016Blah blah blah
1017""")
1018
1019 def test_dont_mangle_from(self):
1020 s = StringIO()
1021 g = Generator(s, mangle_from_=False)
1022 g.flatten(self.msg)
1023 self.assertEqual(s.getvalue(), """\
1024From: aaa@bbb.org
1025
1026From the desk of A.A.A.:
1027Blah blah blah
1028""")
1029
1030
Ezio Melottib3aedd42010-11-20 19:04:17 +00001031
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001032# Test the basic MIMEAudio class
1033class TestMIMEAudio(unittest.TestCase):
1034 def setUp(self):
1035 # Make sure we pick up the audiotest.au that lives in email/test/data.
1036 # In Python, there's an audiotest.au living in Lib/test but that isn't
1037 # included in some binary distros that don't include the test
1038 # package. The trailing empty string on the .join() is significant
1039 # since findfile() will do a dirname().
1040 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
1041 with open(findfile('audiotest.au', datadir), 'rb') as fp:
1042 self._audiodata = fp.read()
1043 self._au = MIMEAudio(self._audiodata)
1044
1045 def test_guess_minor_type(self):
1046 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1047
1048 def test_encoding(self):
1049 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001050 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1051 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001052
1053 def test_checkSetMinor(self):
1054 au = MIMEAudio(self._audiodata, 'fish')
1055 self.assertEqual(au.get_content_type(), 'audio/fish')
1056
1057 def test_add_header(self):
1058 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001059 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001060 self._au.add_header('Content-Disposition', 'attachment',
1061 filename='audiotest.au')
1062 eq(self._au['content-disposition'],
1063 'attachment; filename="audiotest.au"')
1064 eq(self._au.get_params(header='content-disposition'),
1065 [('attachment', ''), ('filename', 'audiotest.au')])
1066 eq(self._au.get_param('filename', header='content-disposition'),
1067 'audiotest.au')
1068 missing = []
1069 eq(self._au.get_param('attachment', header='content-disposition'), '')
1070 unless(self._au.get_param('foo', failobj=missing,
1071 header='content-disposition') is missing)
1072 # Try some missing stuff
1073 unless(self._au.get_param('foobar', missing) is missing)
1074 unless(self._au.get_param('attachment', missing,
1075 header='foobar') is missing)
1076
1077
Ezio Melottib3aedd42010-11-20 19:04:17 +00001078
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001079# Test the basic MIMEImage class
1080class TestMIMEImage(unittest.TestCase):
1081 def setUp(self):
1082 with openfile('PyBanner048.gif', 'rb') as fp:
1083 self._imgdata = fp.read()
1084 self._im = MIMEImage(self._imgdata)
1085
1086 def test_guess_minor_type(self):
1087 self.assertEqual(self._im.get_content_type(), 'image/gif')
1088
1089 def test_encoding(self):
1090 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001091 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1092 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001093
1094 def test_checkSetMinor(self):
1095 im = MIMEImage(self._imgdata, 'fish')
1096 self.assertEqual(im.get_content_type(), 'image/fish')
1097
1098 def test_add_header(self):
1099 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001100 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001101 self._im.add_header('Content-Disposition', 'attachment',
1102 filename='dingusfish.gif')
1103 eq(self._im['content-disposition'],
1104 'attachment; filename="dingusfish.gif"')
1105 eq(self._im.get_params(header='content-disposition'),
1106 [('attachment', ''), ('filename', 'dingusfish.gif')])
1107 eq(self._im.get_param('filename', header='content-disposition'),
1108 'dingusfish.gif')
1109 missing = []
1110 eq(self._im.get_param('attachment', header='content-disposition'), '')
1111 unless(self._im.get_param('foo', failobj=missing,
1112 header='content-disposition') is missing)
1113 # Try some missing stuff
1114 unless(self._im.get_param('foobar', missing) is missing)
1115 unless(self._im.get_param('attachment', missing,
1116 header='foobar') is missing)
1117
1118
Ezio Melottib3aedd42010-11-20 19:04:17 +00001119
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001120# Test the basic MIMEApplication class
1121class TestMIMEApplication(unittest.TestCase):
1122 def test_headers(self):
1123 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001124 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001125 eq(msg.get_content_type(), 'application/octet-stream')
1126 eq(msg['content-transfer-encoding'], 'base64')
1127
1128 def test_body(self):
1129 eq = self.assertEqual
Barry Warsaw8c571042007-08-30 19:17:18 +00001130 bytes = b'\xfa\xfb\xfc\xfd\xfe\xff'
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001131 msg = MIMEApplication(bytes)
R. David Murray7da8f062010-06-04 16:11:08 +00001132 eq(msg.get_payload(), '+vv8/f7/')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001133 eq(msg.get_payload(decode=True), bytes)
1134
1135
Ezio Melottib3aedd42010-11-20 19:04:17 +00001136
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001137# Test the basic MIMEText class
1138class TestMIMEText(unittest.TestCase):
1139 def setUp(self):
1140 self._msg = MIMEText('hello there')
1141
1142 def test_types(self):
1143 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001144 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001145 eq(self._msg.get_content_type(), 'text/plain')
1146 eq(self._msg.get_param('charset'), 'us-ascii')
1147 missing = []
1148 unless(self._msg.get_param('foobar', missing) is missing)
1149 unless(self._msg.get_param('charset', missing, header='foobar')
1150 is missing)
1151
1152 def test_payload(self):
1153 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001154 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001155
1156 def test_charset(self):
1157 eq = self.assertEqual
1158 msg = MIMEText('hello there', _charset='us-ascii')
1159 eq(msg.get_charset().input_charset, 'us-ascii')
1160 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1161
R. David Murray850fc852010-06-03 01:58:28 +00001162 def test_7bit_input(self):
1163 eq = self.assertEqual
1164 msg = MIMEText('hello there', _charset='us-ascii')
1165 eq(msg.get_charset().input_charset, 'us-ascii')
1166 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1167
1168 def test_7bit_input_no_charset(self):
1169 eq = self.assertEqual
1170 msg = MIMEText('hello there')
1171 eq(msg.get_charset(), 'us-ascii')
1172 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1173 self.assertTrue('hello there' in msg.as_string())
1174
1175 def test_utf8_input(self):
1176 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1177 eq = self.assertEqual
1178 msg = MIMEText(teststr, _charset='utf-8')
1179 eq(msg.get_charset().output_charset, 'utf-8')
1180 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1181 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1182
1183 @unittest.skip("can't fix because of backward compat in email5, "
1184 "will fix in email6")
1185 def test_utf8_input_no_charset(self):
1186 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1187 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1188
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001189
Ezio Melottib3aedd42010-11-20 19:04:17 +00001190
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001191# Test complicated multipart/* messages
1192class TestMultipart(TestEmailBase):
1193 def setUp(self):
1194 with openfile('PyBanner048.gif', 'rb') as fp:
1195 data = fp.read()
1196 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1197 image = MIMEImage(data, name='dingusfish.gif')
1198 image.add_header('content-disposition', 'attachment',
1199 filename='dingusfish.gif')
1200 intro = MIMEText('''\
1201Hi there,
1202
1203This is the dingus fish.
1204''')
1205 container.attach(intro)
1206 container.attach(image)
1207 container['From'] = 'Barry <barry@digicool.com>'
1208 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1209 container['Subject'] = 'Here is your dingus fish'
1210
1211 now = 987809702.54848599
1212 timetuple = time.localtime(now)
1213 if timetuple[-1] == 0:
1214 tzsecs = time.timezone
1215 else:
1216 tzsecs = time.altzone
1217 if tzsecs > 0:
1218 sign = '-'
1219 else:
1220 sign = '+'
1221 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1222 container['Date'] = time.strftime(
1223 '%a, %d %b %Y %H:%M:%S',
1224 time.localtime(now)) + tzoffset
1225 self._msg = container
1226 self._im = image
1227 self._txt = intro
1228
1229 def test_hierarchy(self):
1230 # convenience
1231 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001232 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001233 raises = self.assertRaises
1234 # tests
1235 m = self._msg
1236 unless(m.is_multipart())
1237 eq(m.get_content_type(), 'multipart/mixed')
1238 eq(len(m.get_payload()), 2)
1239 raises(IndexError, m.get_payload, 2)
1240 m0 = m.get_payload(0)
1241 m1 = m.get_payload(1)
1242 unless(m0 is self._txt)
1243 unless(m1 is self._im)
1244 eq(m.get_payload(), [m0, m1])
1245 unless(not m0.is_multipart())
1246 unless(not m1.is_multipart())
1247
1248 def test_empty_multipart_idempotent(self):
1249 text = """\
1250Content-Type: multipart/mixed; boundary="BOUNDARY"
1251MIME-Version: 1.0
1252Subject: A subject
1253To: aperson@dom.ain
1254From: bperson@dom.ain
1255
1256
1257--BOUNDARY
1258
1259
1260--BOUNDARY--
1261"""
1262 msg = Parser().parsestr(text)
1263 self.ndiffAssertEqual(text, msg.as_string())
1264
1265 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1266 outer = MIMEBase('multipart', 'mixed')
1267 outer['Subject'] = 'A subject'
1268 outer['To'] = 'aperson@dom.ain'
1269 outer['From'] = 'bperson@dom.ain'
1270 outer.set_boundary('BOUNDARY')
1271 self.ndiffAssertEqual(outer.as_string(), '''\
1272Content-Type: multipart/mixed; boundary="BOUNDARY"
1273MIME-Version: 1.0
1274Subject: A subject
1275To: aperson@dom.ain
1276From: bperson@dom.ain
1277
1278--BOUNDARY
1279
1280--BOUNDARY--''')
1281
1282 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1283 outer = MIMEBase('multipart', 'mixed')
1284 outer['Subject'] = 'A subject'
1285 outer['To'] = 'aperson@dom.ain'
1286 outer['From'] = 'bperson@dom.ain'
1287 outer.preamble = ''
1288 outer.epilogue = ''
1289 outer.set_boundary('BOUNDARY')
1290 self.ndiffAssertEqual(outer.as_string(), '''\
1291Content-Type: multipart/mixed; boundary="BOUNDARY"
1292MIME-Version: 1.0
1293Subject: A subject
1294To: aperson@dom.ain
1295From: bperson@dom.ain
1296
1297
1298--BOUNDARY
1299
1300--BOUNDARY--
1301''')
1302
1303 def test_one_part_in_a_multipart(self):
1304 eq = self.ndiffAssertEqual
1305 outer = MIMEBase('multipart', 'mixed')
1306 outer['Subject'] = 'A subject'
1307 outer['To'] = 'aperson@dom.ain'
1308 outer['From'] = 'bperson@dom.ain'
1309 outer.set_boundary('BOUNDARY')
1310 msg = MIMEText('hello world')
1311 outer.attach(msg)
1312 eq(outer.as_string(), '''\
1313Content-Type: multipart/mixed; boundary="BOUNDARY"
1314MIME-Version: 1.0
1315Subject: A subject
1316To: aperson@dom.ain
1317From: bperson@dom.ain
1318
1319--BOUNDARY
1320Content-Type: text/plain; charset="us-ascii"
1321MIME-Version: 1.0
1322Content-Transfer-Encoding: 7bit
1323
1324hello world
1325--BOUNDARY--''')
1326
1327 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1328 eq = self.ndiffAssertEqual
1329 outer = MIMEBase('multipart', 'mixed')
1330 outer['Subject'] = 'A subject'
1331 outer['To'] = 'aperson@dom.ain'
1332 outer['From'] = 'bperson@dom.ain'
1333 outer.preamble = ''
1334 msg = MIMEText('hello world')
1335 outer.attach(msg)
1336 outer.set_boundary('BOUNDARY')
1337 eq(outer.as_string(), '''\
1338Content-Type: multipart/mixed; boundary="BOUNDARY"
1339MIME-Version: 1.0
1340Subject: A subject
1341To: aperson@dom.ain
1342From: bperson@dom.ain
1343
1344
1345--BOUNDARY
1346Content-Type: text/plain; charset="us-ascii"
1347MIME-Version: 1.0
1348Content-Transfer-Encoding: 7bit
1349
1350hello world
1351--BOUNDARY--''')
1352
1353
1354 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1355 eq = self.ndiffAssertEqual
1356 outer = MIMEBase('multipart', 'mixed')
1357 outer['Subject'] = 'A subject'
1358 outer['To'] = 'aperson@dom.ain'
1359 outer['From'] = 'bperson@dom.ain'
1360 outer.preamble = None
1361 msg = MIMEText('hello world')
1362 outer.attach(msg)
1363 outer.set_boundary('BOUNDARY')
1364 eq(outer.as_string(), '''\
1365Content-Type: multipart/mixed; boundary="BOUNDARY"
1366MIME-Version: 1.0
1367Subject: A subject
1368To: aperson@dom.ain
1369From: bperson@dom.ain
1370
1371--BOUNDARY
1372Content-Type: text/plain; charset="us-ascii"
1373MIME-Version: 1.0
1374Content-Transfer-Encoding: 7bit
1375
1376hello world
1377--BOUNDARY--''')
1378
1379
1380 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1381 eq = self.ndiffAssertEqual
1382 outer = MIMEBase('multipart', 'mixed')
1383 outer['Subject'] = 'A subject'
1384 outer['To'] = 'aperson@dom.ain'
1385 outer['From'] = 'bperson@dom.ain'
1386 outer.epilogue = None
1387 msg = MIMEText('hello world')
1388 outer.attach(msg)
1389 outer.set_boundary('BOUNDARY')
1390 eq(outer.as_string(), '''\
1391Content-Type: multipart/mixed; boundary="BOUNDARY"
1392MIME-Version: 1.0
1393Subject: A subject
1394To: aperson@dom.ain
1395From: bperson@dom.ain
1396
1397--BOUNDARY
1398Content-Type: text/plain; charset="us-ascii"
1399MIME-Version: 1.0
1400Content-Transfer-Encoding: 7bit
1401
1402hello world
1403--BOUNDARY--''')
1404
1405
1406 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1407 eq = self.ndiffAssertEqual
1408 outer = MIMEBase('multipart', 'mixed')
1409 outer['Subject'] = 'A subject'
1410 outer['To'] = 'aperson@dom.ain'
1411 outer['From'] = 'bperson@dom.ain'
1412 outer.epilogue = ''
1413 msg = MIMEText('hello world')
1414 outer.attach(msg)
1415 outer.set_boundary('BOUNDARY')
1416 eq(outer.as_string(), '''\
1417Content-Type: multipart/mixed; boundary="BOUNDARY"
1418MIME-Version: 1.0
1419Subject: A subject
1420To: aperson@dom.ain
1421From: bperson@dom.ain
1422
1423--BOUNDARY
1424Content-Type: text/plain; charset="us-ascii"
1425MIME-Version: 1.0
1426Content-Transfer-Encoding: 7bit
1427
1428hello world
1429--BOUNDARY--
1430''')
1431
1432
1433 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1434 eq = self.ndiffAssertEqual
1435 outer = MIMEBase('multipart', 'mixed')
1436 outer['Subject'] = 'A subject'
1437 outer['To'] = 'aperson@dom.ain'
1438 outer['From'] = 'bperson@dom.ain'
1439 outer.epilogue = '\n'
1440 msg = MIMEText('hello world')
1441 outer.attach(msg)
1442 outer.set_boundary('BOUNDARY')
1443 eq(outer.as_string(), '''\
1444Content-Type: multipart/mixed; boundary="BOUNDARY"
1445MIME-Version: 1.0
1446Subject: A subject
1447To: aperson@dom.ain
1448From: bperson@dom.ain
1449
1450--BOUNDARY
1451Content-Type: text/plain; charset="us-ascii"
1452MIME-Version: 1.0
1453Content-Transfer-Encoding: 7bit
1454
1455hello world
1456--BOUNDARY--
1457
1458''')
1459
1460 def test_message_external_body(self):
1461 eq = self.assertEqual
1462 msg = self._msgobj('msg_36.txt')
1463 eq(len(msg.get_payload()), 2)
1464 msg1 = msg.get_payload(1)
1465 eq(msg1.get_content_type(), 'multipart/alternative')
1466 eq(len(msg1.get_payload()), 2)
1467 for subpart in msg1.get_payload():
1468 eq(subpart.get_content_type(), 'message/external-body')
1469 eq(len(subpart.get_payload()), 1)
1470 subsubpart = subpart.get_payload(0)
1471 eq(subsubpart.get_content_type(), 'text/plain')
1472
1473 def test_double_boundary(self):
1474 # msg_37.txt is a multipart that contains two dash-boundary's in a
1475 # row. Our interpretation of RFC 2046 calls for ignoring the second
1476 # and subsequent boundaries.
1477 msg = self._msgobj('msg_37.txt')
1478 self.assertEqual(len(msg.get_payload()), 3)
1479
1480 def test_nested_inner_contains_outer_boundary(self):
1481 eq = self.ndiffAssertEqual
1482 # msg_38.txt has an inner part that contains outer boundaries. My
1483 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1484 # these are illegal and should be interpreted as unterminated inner
1485 # parts.
1486 msg = self._msgobj('msg_38.txt')
1487 sfp = StringIO()
1488 iterators._structure(msg, sfp)
1489 eq(sfp.getvalue(), """\
1490multipart/mixed
1491 multipart/mixed
1492 multipart/alternative
1493 text/plain
1494 text/plain
1495 text/plain
1496 text/plain
1497""")
1498
1499 def test_nested_with_same_boundary(self):
1500 eq = self.ndiffAssertEqual
1501 # msg 39.txt is similarly evil in that it's got inner parts that use
1502 # the same boundary as outer parts. Again, I believe the way this is
1503 # parsed is closest to the spirit of RFC 2046
1504 msg = self._msgobj('msg_39.txt')
1505 sfp = StringIO()
1506 iterators._structure(msg, sfp)
1507 eq(sfp.getvalue(), """\
1508multipart/mixed
1509 multipart/mixed
1510 multipart/alternative
1511 application/octet-stream
1512 application/octet-stream
1513 text/plain
1514""")
1515
1516 def test_boundary_in_non_multipart(self):
1517 msg = self._msgobj('msg_40.txt')
1518 self.assertEqual(msg.as_string(), '''\
1519MIME-Version: 1.0
1520Content-Type: text/html; boundary="--961284236552522269"
1521
1522----961284236552522269
1523Content-Type: text/html;
1524Content-Transfer-Encoding: 7Bit
1525
1526<html></html>
1527
1528----961284236552522269--
1529''')
1530
1531 def test_boundary_with_leading_space(self):
1532 eq = self.assertEqual
1533 msg = email.message_from_string('''\
1534MIME-Version: 1.0
1535Content-Type: multipart/mixed; boundary=" XXXX"
1536
1537-- XXXX
1538Content-Type: text/plain
1539
1540
1541-- XXXX
1542Content-Type: text/plain
1543
1544-- XXXX--
1545''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001546 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001547 eq(msg.get_boundary(), ' XXXX')
1548 eq(len(msg.get_payload()), 2)
1549
1550 def test_boundary_without_trailing_newline(self):
1551 m = Parser().parsestr("""\
1552Content-Type: multipart/mixed; boundary="===============0012394164=="
1553MIME-Version: 1.0
1554
1555--===============0012394164==
1556Content-Type: image/file1.jpg
1557MIME-Version: 1.0
1558Content-Transfer-Encoding: base64
1559
1560YXNkZg==
1561--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001562 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001563
1564
Ezio Melottib3aedd42010-11-20 19:04:17 +00001565
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001566# Test some badly formatted messages
1567class TestNonConformant(TestEmailBase):
1568 def test_parse_missing_minor_type(self):
1569 eq = self.assertEqual
1570 msg = self._msgobj('msg_14.txt')
1571 eq(msg.get_content_type(), 'text/plain')
1572 eq(msg.get_content_maintype(), 'text')
1573 eq(msg.get_content_subtype(), 'plain')
1574
1575 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001576 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001577 msg = self._msgobj('msg_15.txt')
1578 # XXX We can probably eventually do better
1579 inner = msg.get_payload(0)
1580 unless(hasattr(inner, 'defects'))
1581 self.assertEqual(len(inner.defects), 1)
1582 unless(isinstance(inner.defects[0],
1583 errors.StartBoundaryNotFoundDefect))
1584
1585 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001586 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001587 msg = self._msgobj('msg_25.txt')
1588 unless(isinstance(msg.get_payload(), str))
1589 self.assertEqual(len(msg.defects), 2)
1590 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1591 unless(isinstance(msg.defects[1],
1592 errors.MultipartInvariantViolationDefect))
1593
1594 def test_invalid_content_type(self):
1595 eq = self.assertEqual
1596 neq = self.ndiffAssertEqual
1597 msg = Message()
1598 # RFC 2045, $5.2 says invalid yields text/plain
1599 msg['Content-Type'] = 'text'
1600 eq(msg.get_content_maintype(), 'text')
1601 eq(msg.get_content_subtype(), 'plain')
1602 eq(msg.get_content_type(), 'text/plain')
1603 # Clear the old value and try something /really/ invalid
1604 del msg['content-type']
1605 msg['Content-Type'] = 'foo'
1606 eq(msg.get_content_maintype(), 'text')
1607 eq(msg.get_content_subtype(), 'plain')
1608 eq(msg.get_content_type(), 'text/plain')
1609 # Still, make sure that the message is idempotently generated
1610 s = StringIO()
1611 g = Generator(s)
1612 g.flatten(msg)
1613 neq(s.getvalue(), 'Content-Type: foo\n\n')
1614
1615 def test_no_start_boundary(self):
1616 eq = self.ndiffAssertEqual
1617 msg = self._msgobj('msg_31.txt')
1618 eq(msg.get_payload(), """\
1619--BOUNDARY
1620Content-Type: text/plain
1621
1622message 1
1623
1624--BOUNDARY
1625Content-Type: text/plain
1626
1627message 2
1628
1629--BOUNDARY--
1630""")
1631
1632 def test_no_separating_blank_line(self):
1633 eq = self.ndiffAssertEqual
1634 msg = self._msgobj('msg_35.txt')
1635 eq(msg.as_string(), """\
1636From: aperson@dom.ain
1637To: bperson@dom.ain
1638Subject: here's something interesting
1639
1640counter to RFC 2822, there's no separating newline here
1641""")
1642
1643 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001644 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001645 msg = self._msgobj('msg_41.txt')
1646 unless(hasattr(msg, 'defects'))
1647 self.assertEqual(len(msg.defects), 2)
1648 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1649 unless(isinstance(msg.defects[1],
1650 errors.MultipartInvariantViolationDefect))
1651
1652 def test_missing_start_boundary(self):
1653 outer = self._msgobj('msg_42.txt')
1654 # The message structure is:
1655 #
1656 # multipart/mixed
1657 # text/plain
1658 # message/rfc822
1659 # multipart/mixed [*]
1660 #
1661 # [*] This message is missing its start boundary
1662 bad = outer.get_payload(1).get_payload(0)
1663 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001664 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001665 errors.StartBoundaryNotFoundDefect))
1666
1667 def test_first_line_is_continuation_header(self):
1668 eq = self.assertEqual
1669 m = ' Line 1\nLine 2\nLine 3'
1670 msg = email.message_from_string(m)
1671 eq(msg.keys(), [])
1672 eq(msg.get_payload(), 'Line 2\nLine 3')
1673 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001674 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001675 errors.FirstHeaderLineIsContinuationDefect))
1676 eq(msg.defects[0].line, ' Line 1\n')
1677
1678
Ezio Melottib3aedd42010-11-20 19:04:17 +00001679
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001680# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001681class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001682 def test_rfc2047_multiline(self):
1683 eq = self.assertEqual
1684 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1685 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1686 dh = decode_header(s)
1687 eq(dh, [
1688 (b'Re:', None),
1689 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1690 (b'baz foo bar', None),
1691 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1692 header = make_header(dh)
1693 eq(str(header),
1694 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001695 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001696Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1697 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001698
1699 def test_whitespace_eater_unicode(self):
1700 eq = self.assertEqual
1701 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1702 dh = decode_header(s)
1703 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1704 (b'Pirard <pirard@dom.ain>', None)])
1705 header = str(make_header(dh))
1706 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1707
1708 def test_whitespace_eater_unicode_2(self):
1709 eq = self.assertEqual
1710 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1711 dh = decode_header(s)
1712 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1713 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1714 hu = str(make_header(dh))
1715 eq(hu, 'The quick brown fox jumped over the lazy dog')
1716
1717 def test_rfc2047_missing_whitespace(self):
1718 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1719 dh = decode_header(s)
1720 self.assertEqual(dh, [(s, None)])
1721
1722 def test_rfc2047_with_whitespace(self):
1723 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1724 dh = decode_header(s)
1725 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1726 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1727 (b'sbord', None)])
1728
R. David Murrayc4e69cc2010-08-03 22:14:10 +00001729 def test_rfc2047_B_bad_padding(self):
1730 s = '=?iso-8859-1?B?%s?='
1731 data = [ # only test complete bytes
1732 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
1733 ('dmk=', b'vi'), ('dmk', b'vi')
1734 ]
1735 for q, a in data:
1736 dh = decode_header(s % q)
1737 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001738
R. David Murray31e984c2010-10-01 15:40:20 +00001739 def test_rfc2047_Q_invalid_digits(self):
1740 # issue 10004.
1741 s = '=?iso-8659-1?Q?andr=e9=zz?='
1742 self.assertEqual(decode_header(s),
1743 [(b'andr\xe9=zz', 'iso-8659-1')])
1744
Ezio Melottib3aedd42010-11-20 19:04:17 +00001745
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001746# Test the MIMEMessage class
1747class TestMIMEMessage(TestEmailBase):
1748 def setUp(self):
1749 with openfile('msg_11.txt') as fp:
1750 self._text = fp.read()
1751
1752 def test_type_error(self):
1753 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1754
1755 def test_valid_argument(self):
1756 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001757 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001758 subject = 'A sub-message'
1759 m = Message()
1760 m['Subject'] = subject
1761 r = MIMEMessage(m)
1762 eq(r.get_content_type(), 'message/rfc822')
1763 payload = r.get_payload()
1764 unless(isinstance(payload, list))
1765 eq(len(payload), 1)
1766 subpart = payload[0]
1767 unless(subpart is m)
1768 eq(subpart['subject'], subject)
1769
1770 def test_bad_multipart(self):
1771 eq = self.assertEqual
1772 msg1 = Message()
1773 msg1['Subject'] = 'subpart 1'
1774 msg2 = Message()
1775 msg2['Subject'] = 'subpart 2'
1776 r = MIMEMessage(msg1)
1777 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
1778
1779 def test_generate(self):
1780 # First craft the message to be encapsulated
1781 m = Message()
1782 m['Subject'] = 'An enclosed message'
1783 m.set_payload('Here is the body of the message.\n')
1784 r = MIMEMessage(m)
1785 r['Subject'] = 'The enclosing message'
1786 s = StringIO()
1787 g = Generator(s)
1788 g.flatten(r)
1789 self.assertEqual(s.getvalue(), """\
1790Content-Type: message/rfc822
1791MIME-Version: 1.0
1792Subject: The enclosing message
1793
1794Subject: An enclosed message
1795
1796Here is the body of the message.
1797""")
1798
1799 def test_parse_message_rfc822(self):
1800 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001801 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001802 msg = self._msgobj('msg_11.txt')
1803 eq(msg.get_content_type(), 'message/rfc822')
1804 payload = msg.get_payload()
1805 unless(isinstance(payload, list))
1806 eq(len(payload), 1)
1807 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001808 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001809 eq(submsg['subject'], 'An enclosed message')
1810 eq(submsg.get_payload(), 'Here is the body of the message.\n')
1811
1812 def test_dsn(self):
1813 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001814 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001815 # msg 16 is a Delivery Status Notification, see RFC 1894
1816 msg = self._msgobj('msg_16.txt')
1817 eq(msg.get_content_type(), 'multipart/report')
1818 unless(msg.is_multipart())
1819 eq(len(msg.get_payload()), 3)
1820 # Subpart 1 is a text/plain, human readable section
1821 subpart = msg.get_payload(0)
1822 eq(subpart.get_content_type(), 'text/plain')
1823 eq(subpart.get_payload(), """\
1824This report relates to a message you sent with the following header fields:
1825
1826 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
1827 Date: Sun, 23 Sep 2001 20:10:55 -0700
1828 From: "Ian T. Henry" <henryi@oxy.edu>
1829 To: SoCal Raves <scr@socal-raves.org>
1830 Subject: [scr] yeah for Ians!!
1831
1832Your message cannot be delivered to the following recipients:
1833
1834 Recipient address: jangel1@cougar.noc.ucla.edu
1835 Reason: recipient reached disk quota
1836
1837""")
1838 # Subpart 2 contains the machine parsable DSN information. It
1839 # consists of two blocks of headers, represented by two nested Message
1840 # objects.
1841 subpart = msg.get_payload(1)
1842 eq(subpart.get_content_type(), 'message/delivery-status')
1843 eq(len(subpart.get_payload()), 2)
1844 # message/delivery-status should treat each block as a bunch of
1845 # headers, i.e. a bunch of Message objects.
1846 dsn1 = subpart.get_payload(0)
1847 unless(isinstance(dsn1, Message))
1848 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
1849 eq(dsn1.get_param('dns', header='reporting-mta'), '')
1850 # Try a missing one <wink>
1851 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
1852 dsn2 = subpart.get_payload(1)
1853 unless(isinstance(dsn2, Message))
1854 eq(dsn2['action'], 'failed')
1855 eq(dsn2.get_params(header='original-recipient'),
1856 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
1857 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
1858 # Subpart 3 is the original message
1859 subpart = msg.get_payload(2)
1860 eq(subpart.get_content_type(), 'message/rfc822')
1861 payload = subpart.get_payload()
1862 unless(isinstance(payload, list))
1863 eq(len(payload), 1)
1864 subsubpart = payload[0]
1865 unless(isinstance(subsubpart, Message))
1866 eq(subsubpart.get_content_type(), 'text/plain')
1867 eq(subsubpart['message-id'],
1868 '<002001c144a6$8752e060$56104586@oxy.edu>')
1869
1870 def test_epilogue(self):
1871 eq = self.ndiffAssertEqual
1872 with openfile('msg_21.txt') as fp:
1873 text = fp.read()
1874 msg = Message()
1875 msg['From'] = 'aperson@dom.ain'
1876 msg['To'] = 'bperson@dom.ain'
1877 msg['Subject'] = 'Test'
1878 msg.preamble = 'MIME message'
1879 msg.epilogue = 'End of MIME message\n'
1880 msg1 = MIMEText('One')
1881 msg2 = MIMEText('Two')
1882 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1883 msg.attach(msg1)
1884 msg.attach(msg2)
1885 sfp = StringIO()
1886 g = Generator(sfp)
1887 g.flatten(msg)
1888 eq(sfp.getvalue(), text)
1889
1890 def test_no_nl_preamble(self):
1891 eq = self.ndiffAssertEqual
1892 msg = Message()
1893 msg['From'] = 'aperson@dom.ain'
1894 msg['To'] = 'bperson@dom.ain'
1895 msg['Subject'] = 'Test'
1896 msg.preamble = 'MIME message'
1897 msg.epilogue = ''
1898 msg1 = MIMEText('One')
1899 msg2 = MIMEText('Two')
1900 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1901 msg.attach(msg1)
1902 msg.attach(msg2)
1903 eq(msg.as_string(), """\
1904From: aperson@dom.ain
1905To: bperson@dom.ain
1906Subject: Test
1907Content-Type: multipart/mixed; boundary="BOUNDARY"
1908
1909MIME message
1910--BOUNDARY
1911Content-Type: text/plain; charset="us-ascii"
1912MIME-Version: 1.0
1913Content-Transfer-Encoding: 7bit
1914
1915One
1916--BOUNDARY
1917Content-Type: text/plain; charset="us-ascii"
1918MIME-Version: 1.0
1919Content-Transfer-Encoding: 7bit
1920
1921Two
1922--BOUNDARY--
1923""")
1924
1925 def test_default_type(self):
1926 eq = self.assertEqual
1927 with openfile('msg_30.txt') as fp:
1928 msg = email.message_from_file(fp)
1929 container1 = msg.get_payload(0)
1930 eq(container1.get_default_type(), 'message/rfc822')
1931 eq(container1.get_content_type(), 'message/rfc822')
1932 container2 = msg.get_payload(1)
1933 eq(container2.get_default_type(), 'message/rfc822')
1934 eq(container2.get_content_type(), 'message/rfc822')
1935 container1a = container1.get_payload(0)
1936 eq(container1a.get_default_type(), 'text/plain')
1937 eq(container1a.get_content_type(), 'text/plain')
1938 container2a = container2.get_payload(0)
1939 eq(container2a.get_default_type(), 'text/plain')
1940 eq(container2a.get_content_type(), 'text/plain')
1941
1942 def test_default_type_with_explicit_container_type(self):
1943 eq = self.assertEqual
1944 with openfile('msg_28.txt') as fp:
1945 msg = email.message_from_file(fp)
1946 container1 = msg.get_payload(0)
1947 eq(container1.get_default_type(), 'message/rfc822')
1948 eq(container1.get_content_type(), 'message/rfc822')
1949 container2 = msg.get_payload(1)
1950 eq(container2.get_default_type(), 'message/rfc822')
1951 eq(container2.get_content_type(), 'message/rfc822')
1952 container1a = container1.get_payload(0)
1953 eq(container1a.get_default_type(), 'text/plain')
1954 eq(container1a.get_content_type(), 'text/plain')
1955 container2a = container2.get_payload(0)
1956 eq(container2a.get_default_type(), 'text/plain')
1957 eq(container2a.get_content_type(), 'text/plain')
1958
1959 def test_default_type_non_parsed(self):
1960 eq = self.assertEqual
1961 neq = self.ndiffAssertEqual
1962 # Set up container
1963 container = MIMEMultipart('digest', 'BOUNDARY')
1964 container.epilogue = ''
1965 # Set up subparts
1966 subpart1a = MIMEText('message 1\n')
1967 subpart2a = MIMEText('message 2\n')
1968 subpart1 = MIMEMessage(subpart1a)
1969 subpart2 = MIMEMessage(subpart2a)
1970 container.attach(subpart1)
1971 container.attach(subpart2)
1972 eq(subpart1.get_content_type(), 'message/rfc822')
1973 eq(subpart1.get_default_type(), 'message/rfc822')
1974 eq(subpart2.get_content_type(), 'message/rfc822')
1975 eq(subpart2.get_default_type(), 'message/rfc822')
1976 neq(container.as_string(0), '''\
1977Content-Type: multipart/digest; boundary="BOUNDARY"
1978MIME-Version: 1.0
1979
1980--BOUNDARY
1981Content-Type: message/rfc822
1982MIME-Version: 1.0
1983
1984Content-Type: text/plain; charset="us-ascii"
1985MIME-Version: 1.0
1986Content-Transfer-Encoding: 7bit
1987
1988message 1
1989
1990--BOUNDARY
1991Content-Type: message/rfc822
1992MIME-Version: 1.0
1993
1994Content-Type: text/plain; charset="us-ascii"
1995MIME-Version: 1.0
1996Content-Transfer-Encoding: 7bit
1997
1998message 2
1999
2000--BOUNDARY--
2001''')
2002 del subpart1['content-type']
2003 del subpart1['mime-version']
2004 del subpart2['content-type']
2005 del subpart2['mime-version']
2006 eq(subpart1.get_content_type(), 'message/rfc822')
2007 eq(subpart1.get_default_type(), 'message/rfc822')
2008 eq(subpart2.get_content_type(), 'message/rfc822')
2009 eq(subpart2.get_default_type(), 'message/rfc822')
2010 neq(container.as_string(0), '''\
2011Content-Type: multipart/digest; boundary="BOUNDARY"
2012MIME-Version: 1.0
2013
2014--BOUNDARY
2015
2016Content-Type: text/plain; charset="us-ascii"
2017MIME-Version: 1.0
2018Content-Transfer-Encoding: 7bit
2019
2020message 1
2021
2022--BOUNDARY
2023
2024Content-Type: text/plain; charset="us-ascii"
2025MIME-Version: 1.0
2026Content-Transfer-Encoding: 7bit
2027
2028message 2
2029
2030--BOUNDARY--
2031''')
2032
2033 def test_mime_attachments_in_constructor(self):
2034 eq = self.assertEqual
2035 text1 = MIMEText('')
2036 text2 = MIMEText('')
2037 msg = MIMEMultipart(_subparts=(text1, text2))
2038 eq(len(msg.get_payload()), 2)
2039 eq(msg.get_payload(0), text1)
2040 eq(msg.get_payload(1), text2)
2041
Christian Heimes587c2bf2008-01-19 16:21:02 +00002042 def test_default_multipart_constructor(self):
2043 msg = MIMEMultipart()
2044 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002045
Ezio Melottib3aedd42010-11-20 19:04:17 +00002046
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002047# A general test of parser->model->generator idempotency. IOW, read a message
2048# in, parse it into a message object tree, then without touching the tree,
2049# regenerate the plain text. The original text and the transformed text
2050# should be identical. Note: that we ignore the Unix-From since that may
2051# contain a changed date.
2052class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002053
2054 linesep = '\n'
2055
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002056 def _msgobj(self, filename):
2057 with openfile(filename) as fp:
2058 data = fp.read()
2059 msg = email.message_from_string(data)
2060 return msg, data
2061
R. David Murray719a4492010-11-21 16:53:48 +00002062 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002063 eq = self.ndiffAssertEqual
2064 s = StringIO()
2065 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002066 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002067 eq(text, s.getvalue())
2068
2069 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002070 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002071 msg, text = self._msgobj('msg_01.txt')
2072 eq(msg.get_content_type(), 'text/plain')
2073 eq(msg.get_content_maintype(), 'text')
2074 eq(msg.get_content_subtype(), 'plain')
2075 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2076 eq(msg.get_param('charset'), 'us-ascii')
2077 eq(msg.preamble, None)
2078 eq(msg.epilogue, None)
2079 self._idempotent(msg, text)
2080
2081 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002082 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002083 msg, text = self._msgobj('msg_03.txt')
2084 eq(msg.get_content_type(), 'text/plain')
2085 eq(msg.get_params(), None)
2086 eq(msg.get_param('charset'), None)
2087 self._idempotent(msg, text)
2088
2089 def test_simple_multipart(self):
2090 msg, text = self._msgobj('msg_04.txt')
2091 self._idempotent(msg, text)
2092
2093 def test_MIME_digest(self):
2094 msg, text = self._msgobj('msg_02.txt')
2095 self._idempotent(msg, text)
2096
2097 def test_long_header(self):
2098 msg, text = self._msgobj('msg_27.txt')
2099 self._idempotent(msg, text)
2100
2101 def test_MIME_digest_with_part_headers(self):
2102 msg, text = self._msgobj('msg_28.txt')
2103 self._idempotent(msg, text)
2104
2105 def test_mixed_with_image(self):
2106 msg, text = self._msgobj('msg_06.txt')
2107 self._idempotent(msg, text)
2108
2109 def test_multipart_report(self):
2110 msg, text = self._msgobj('msg_05.txt')
2111 self._idempotent(msg, text)
2112
2113 def test_dsn(self):
2114 msg, text = self._msgobj('msg_16.txt')
2115 self._idempotent(msg, text)
2116
2117 def test_preamble_epilogue(self):
2118 msg, text = self._msgobj('msg_21.txt')
2119 self._idempotent(msg, text)
2120
2121 def test_multipart_one_part(self):
2122 msg, text = self._msgobj('msg_23.txt')
2123 self._idempotent(msg, text)
2124
2125 def test_multipart_no_parts(self):
2126 msg, text = self._msgobj('msg_24.txt')
2127 self._idempotent(msg, text)
2128
2129 def test_no_start_boundary(self):
2130 msg, text = self._msgobj('msg_31.txt')
2131 self._idempotent(msg, text)
2132
2133 def test_rfc2231_charset(self):
2134 msg, text = self._msgobj('msg_32.txt')
2135 self._idempotent(msg, text)
2136
2137 def test_more_rfc2231_parameters(self):
2138 msg, text = self._msgobj('msg_33.txt')
2139 self._idempotent(msg, text)
2140
2141 def test_text_plain_in_a_multipart_digest(self):
2142 msg, text = self._msgobj('msg_34.txt')
2143 self._idempotent(msg, text)
2144
2145 def test_nested_multipart_mixeds(self):
2146 msg, text = self._msgobj('msg_12a.txt')
2147 self._idempotent(msg, text)
2148
2149 def test_message_external_body_idempotent(self):
2150 msg, text = self._msgobj('msg_36.txt')
2151 self._idempotent(msg, text)
2152
R. David Murray719a4492010-11-21 16:53:48 +00002153 def test_message_delivery_status(self):
2154 msg, text = self._msgobj('msg_43.txt')
2155 self._idempotent(msg, text, unixfrom=True)
2156
R. David Murray96fd54e2010-10-08 15:55:28 +00002157 def test_message_signed_idempotent(self):
2158 msg, text = self._msgobj('msg_45.txt')
2159 self._idempotent(msg, text)
2160
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002161 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002162 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002163 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002164 # Get a message object and reset the seek pointer for other tests
2165 msg, text = self._msgobj('msg_05.txt')
2166 eq(msg.get_content_type(), 'multipart/report')
2167 # Test the Content-Type: parameters
2168 params = {}
2169 for pk, pv in msg.get_params():
2170 params[pk] = pv
2171 eq(params['report-type'], 'delivery-status')
2172 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002173 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2174 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002175 eq(len(msg.get_payload()), 3)
2176 # Make sure the subparts are what we expect
2177 msg1 = msg.get_payload(0)
2178 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002179 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002180 msg2 = msg.get_payload(1)
2181 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002182 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002183 msg3 = msg.get_payload(2)
2184 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002185 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002186 payload = msg3.get_payload()
2187 unless(isinstance(payload, list))
2188 eq(len(payload), 1)
2189 msg4 = payload[0]
2190 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002191 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002192
2193 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002194 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002195 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002196 msg, text = self._msgobj('msg_06.txt')
2197 # Check some of the outer headers
2198 eq(msg.get_content_type(), 'message/rfc822')
2199 # Make sure the payload is a list of exactly one sub-Message, and that
2200 # that submessage has a type of text/plain
2201 payload = msg.get_payload()
2202 unless(isinstance(payload, list))
2203 eq(len(payload), 1)
2204 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002205 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002206 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002207 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002208 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002209
2210
Ezio Melottib3aedd42010-11-20 19:04:17 +00002211
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002212# Test various other bits of the package's functionality
2213class TestMiscellaneous(TestEmailBase):
2214 def test_message_from_string(self):
2215 with openfile('msg_01.txt') as fp:
2216 text = fp.read()
2217 msg = email.message_from_string(text)
2218 s = StringIO()
2219 # Don't wrap/continue long headers since we're trying to test
2220 # idempotency.
2221 g = Generator(s, maxheaderlen=0)
2222 g.flatten(msg)
2223 self.assertEqual(text, s.getvalue())
2224
2225 def test_message_from_file(self):
2226 with openfile('msg_01.txt') as fp:
2227 text = fp.read()
2228 fp.seek(0)
2229 msg = email.message_from_file(fp)
2230 s = StringIO()
2231 # Don't wrap/continue long headers since we're trying to test
2232 # idempotency.
2233 g = Generator(s, maxheaderlen=0)
2234 g.flatten(msg)
2235 self.assertEqual(text, s.getvalue())
2236
2237 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002238 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002239 with openfile('msg_01.txt') as fp:
2240 text = fp.read()
2241
2242 # Create a subclass
2243 class MyMessage(Message):
2244 pass
2245
2246 msg = email.message_from_string(text, MyMessage)
2247 unless(isinstance(msg, MyMessage))
2248 # Try something more complicated
2249 with openfile('msg_02.txt') as fp:
2250 text = fp.read()
2251 msg = email.message_from_string(text, MyMessage)
2252 for subpart in msg.walk():
2253 unless(isinstance(subpart, MyMessage))
2254
2255 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002256 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002257 # Create a subclass
2258 class MyMessage(Message):
2259 pass
2260
2261 with openfile('msg_01.txt') as fp:
2262 msg = email.message_from_file(fp, MyMessage)
2263 unless(isinstance(msg, MyMessage))
2264 # Try something more complicated
2265 with openfile('msg_02.txt') as fp:
2266 msg = email.message_from_file(fp, MyMessage)
2267 for subpart in msg.walk():
2268 unless(isinstance(subpart, MyMessage))
2269
2270 def test__all__(self):
2271 module = __import__('email')
2272 # Can't use sorted() here due to Python 2.3 compatibility
2273 all = module.__all__[:]
2274 all.sort()
2275 self.assertEqual(all, [
2276 'base64mime', 'charset', 'encoders', 'errors', 'generator',
R. David Murray96fd54e2010-10-08 15:55:28 +00002277 'header', 'iterators', 'message', 'message_from_binary_file',
2278 'message_from_bytes', 'message_from_file',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002279 'message_from_string', 'mime', 'parser',
2280 'quoprimime', 'utils',
2281 ])
2282
2283 def test_formatdate(self):
2284 now = time.time()
2285 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2286 time.gmtime(now)[:6])
2287
2288 def test_formatdate_localtime(self):
2289 now = time.time()
2290 self.assertEqual(
2291 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2292 time.localtime(now)[:6])
2293
2294 def test_formatdate_usegmt(self):
2295 now = time.time()
2296 self.assertEqual(
2297 utils.formatdate(now, localtime=False),
2298 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2299 self.assertEqual(
2300 utils.formatdate(now, localtime=False, usegmt=True),
2301 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2302
2303 def test_parsedate_none(self):
2304 self.assertEqual(utils.parsedate(''), None)
2305
2306 def test_parsedate_compact(self):
2307 # The FWS after the comma is optional
2308 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2309 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2310
2311 def test_parsedate_no_dayofweek(self):
2312 eq = self.assertEqual
2313 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2314 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2315
2316 def test_parsedate_compact_no_dayofweek(self):
2317 eq = self.assertEqual
2318 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2319 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2320
R. David Murray4a62e892010-12-23 20:35:46 +00002321 def test_parsedate_no_space_before_positive_offset(self):
2322 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2323 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2324
2325 def test_parsedate_no_space_before_negative_offset(self):
2326 # Issue 1155362: we already handled '+' for this case.
2327 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2328 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2329
2330
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002331 def test_parsedate_acceptable_to_time_functions(self):
2332 eq = self.assertEqual
2333 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2334 t = int(time.mktime(timetup))
2335 eq(time.localtime(t)[:6], timetup[:6])
2336 eq(int(time.strftime('%Y', timetup)), 2003)
2337 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2338 t = int(time.mktime(timetup[:9]))
2339 eq(time.localtime(t)[:6], timetup[:6])
2340 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2341
R. David Murray219d1c82010-08-25 00:45:55 +00002342 def test_parsedate_y2k(self):
2343 """Test for parsing a date with a two-digit year.
2344
2345 Parsing a date with a two-digit year should return the correct
2346 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2347 obsoletes RFC822) requires four-digit years.
2348
2349 """
2350 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2351 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2352 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2353 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2354
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002355 def test_parseaddr_empty(self):
2356 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2357 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2358
2359 def test_noquote_dump(self):
2360 self.assertEqual(
2361 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2362 'A Silly Person <person@dom.ain>')
2363
2364 def test_escape_dump(self):
2365 self.assertEqual(
2366 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2367 r'"A \(Very\) Silly Person" <person@dom.ain>')
2368 a = r'A \(Special\) Person'
2369 b = 'person@dom.ain'
2370 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2371
2372 def test_escape_backslashes(self):
2373 self.assertEqual(
2374 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2375 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2376 a = r'Arthur \Backslash\ Foobar'
2377 b = 'person@dom.ain'
2378 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2379
2380 def test_name_with_dot(self):
2381 x = 'John X. Doe <jxd@example.com>'
2382 y = '"John X. Doe" <jxd@example.com>'
2383 a, b = ('John X. Doe', 'jxd@example.com')
2384 self.assertEqual(utils.parseaddr(x), (a, b))
2385 self.assertEqual(utils.parseaddr(y), (a, b))
2386 # formataddr() quotes the name if there's a dot in it
2387 self.assertEqual(utils.formataddr((a, b)), y)
2388
R. David Murray5397e862010-10-02 15:58:26 +00002389 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2390 # issue 10005. Note that in the third test the second pair of
2391 # backslashes is not actually a quoted pair because it is not inside a
2392 # comment or quoted string: the address being parsed has a quoted
2393 # string containing a quoted backslash, followed by 'example' and two
2394 # backslashes, followed by another quoted string containing a space and
2395 # the word 'example'. parseaddr copies those two backslashes
2396 # literally. Per rfc5322 this is not technically correct since a \ may
2397 # not appear in an address outside of a quoted string. It is probably
2398 # a sensible Postel interpretation, though.
2399 eq = self.assertEqual
2400 eq(utils.parseaddr('""example" example"@example.com'),
2401 ('', '""example" example"@example.com'))
2402 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2403 ('', '"\\"example\\" example"@example.com'))
2404 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2405 ('', '"\\\\"example\\\\" example"@example.com'))
2406
R. David Murray63563cd2010-12-18 18:25:38 +00002407 def test_parseaddr_preserves_spaces_in_local_part(self):
2408 # issue 9286. A normal RFC5322 local part should not contain any
2409 # folding white space, but legacy local parts can (they are a sequence
2410 # of atoms, not dotatoms). On the other hand we strip whitespace from
2411 # before the @ and around dots, on the assumption that the whitespace
2412 # around the punctuation is a mistake in what would otherwise be
2413 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2414 self.assertEqual(('', "merwok wok@xample.com"),
2415 utils.parseaddr("merwok wok@xample.com"))
2416 self.assertEqual(('', "merwok wok@xample.com"),
2417 utils.parseaddr("merwok wok@xample.com"))
2418 self.assertEqual(('', "merwok wok@xample.com"),
2419 utils.parseaddr(" merwok wok @xample.com"))
2420 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2421 utils.parseaddr('merwok"wok" wok@xample.com'))
2422 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2423 utils.parseaddr('merwok. wok . wok@xample.com'))
2424
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002425 def test_multiline_from_comment(self):
2426 x = """\
2427Foo
2428\tBar <foo@example.com>"""
2429 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2430
2431 def test_quote_dump(self):
2432 self.assertEqual(
2433 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2434 r'"A Silly; Person" <person@dom.ain>')
2435
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002436 def test_charset_richcomparisons(self):
2437 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002438 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002439 cset1 = Charset()
2440 cset2 = Charset()
2441 eq(cset1, 'us-ascii')
2442 eq(cset1, 'US-ASCII')
2443 eq(cset1, 'Us-AsCiI')
2444 eq('us-ascii', cset1)
2445 eq('US-ASCII', cset1)
2446 eq('Us-AsCiI', cset1)
2447 ne(cset1, 'usascii')
2448 ne(cset1, 'USASCII')
2449 ne(cset1, 'UsAsCiI')
2450 ne('usascii', cset1)
2451 ne('USASCII', cset1)
2452 ne('UsAsCiI', cset1)
2453 eq(cset1, cset2)
2454 eq(cset2, cset1)
2455
2456 def test_getaddresses(self):
2457 eq = self.assertEqual
2458 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2459 'Bud Person <bperson@dom.ain>']),
2460 [('Al Person', 'aperson@dom.ain'),
2461 ('Bud Person', 'bperson@dom.ain')])
2462
2463 def test_getaddresses_nasty(self):
2464 eq = self.assertEqual
2465 eq(utils.getaddresses(['foo: ;']), [('', '')])
2466 eq(utils.getaddresses(
2467 ['[]*-- =~$']),
2468 [('', ''), ('', ''), ('', '*--')])
2469 eq(utils.getaddresses(
2470 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2471 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2472
2473 def test_getaddresses_embedded_comment(self):
2474 """Test proper handling of a nested comment"""
2475 eq = self.assertEqual
2476 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2477 eq(addrs[0][1], 'foo@bar.com')
2478
2479 def test_utils_quote_unquote(self):
2480 eq = self.assertEqual
2481 msg = Message()
2482 msg.add_header('content-disposition', 'attachment',
2483 filename='foo\\wacky"name')
2484 eq(msg.get_filename(), 'foo\\wacky"name')
2485
2486 def test_get_body_encoding_with_bogus_charset(self):
2487 charset = Charset('not a charset')
2488 self.assertEqual(charset.get_body_encoding(), 'base64')
2489
2490 def test_get_body_encoding_with_uppercase_charset(self):
2491 eq = self.assertEqual
2492 msg = Message()
2493 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2494 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2495 charsets = msg.get_charsets()
2496 eq(len(charsets), 1)
2497 eq(charsets[0], 'utf-8')
2498 charset = Charset(charsets[0])
2499 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002500 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002501 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2502 eq(msg.get_payload(decode=True), b'hello world')
2503 eq(msg['content-transfer-encoding'], 'base64')
2504 # Try another one
2505 msg = Message()
2506 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2507 charsets = msg.get_charsets()
2508 eq(len(charsets), 1)
2509 eq(charsets[0], 'us-ascii')
2510 charset = Charset(charsets[0])
2511 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2512 msg.set_payload('hello world', charset=charset)
2513 eq(msg.get_payload(), 'hello world')
2514 eq(msg['content-transfer-encoding'], '7bit')
2515
2516 def test_charsets_case_insensitive(self):
2517 lc = Charset('us-ascii')
2518 uc = Charset('US-ASCII')
2519 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2520
2521 def test_partial_falls_inside_message_delivery_status(self):
2522 eq = self.ndiffAssertEqual
2523 # The Parser interface provides chunks of data to FeedParser in 8192
2524 # byte gulps. SF bug #1076485 found one of those chunks inside
2525 # message/delivery-status header block, which triggered an
2526 # unreadline() of NeedMoreData.
2527 msg = self._msgobj('msg_43.txt')
2528 sfp = StringIO()
2529 iterators._structure(msg, sfp)
2530 eq(sfp.getvalue(), """\
2531multipart/report
2532 text/plain
2533 message/delivery-status
2534 text/plain
2535 text/plain
2536 text/plain
2537 text/plain
2538 text/plain
2539 text/plain
2540 text/plain
2541 text/plain
2542 text/plain
2543 text/plain
2544 text/plain
2545 text/plain
2546 text/plain
2547 text/plain
2548 text/plain
2549 text/plain
2550 text/plain
2551 text/plain
2552 text/plain
2553 text/plain
2554 text/plain
2555 text/plain
2556 text/plain
2557 text/plain
2558 text/plain
2559 text/plain
2560 text/rfc822-headers
2561""")
2562
R. David Murraya0b44b52010-12-02 21:47:19 +00002563 def test_make_msgid_domain(self):
2564 self.assertEqual(
2565 email.utils.make_msgid(domain='testdomain-string')[-19:],
2566 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002567
Ezio Melottib3aedd42010-11-20 19:04:17 +00002568
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002569# Test the iterator/generators
2570class TestIterators(TestEmailBase):
2571 def test_body_line_iterator(self):
2572 eq = self.assertEqual
2573 neq = self.ndiffAssertEqual
2574 # First a simple non-multipart message
2575 msg = self._msgobj('msg_01.txt')
2576 it = iterators.body_line_iterator(msg)
2577 lines = list(it)
2578 eq(len(lines), 6)
2579 neq(EMPTYSTRING.join(lines), msg.get_payload())
2580 # Now a more complicated multipart
2581 msg = self._msgobj('msg_02.txt')
2582 it = iterators.body_line_iterator(msg)
2583 lines = list(it)
2584 eq(len(lines), 43)
2585 with openfile('msg_19.txt') as fp:
2586 neq(EMPTYSTRING.join(lines), fp.read())
2587
2588 def test_typed_subpart_iterator(self):
2589 eq = self.assertEqual
2590 msg = self._msgobj('msg_04.txt')
2591 it = iterators.typed_subpart_iterator(msg, 'text')
2592 lines = []
2593 subparts = 0
2594 for subpart in it:
2595 subparts += 1
2596 lines.append(subpart.get_payload())
2597 eq(subparts, 2)
2598 eq(EMPTYSTRING.join(lines), """\
2599a simple kind of mirror
2600to reflect upon our own
2601a simple kind of mirror
2602to reflect upon our own
2603""")
2604
2605 def test_typed_subpart_iterator_default_type(self):
2606 eq = self.assertEqual
2607 msg = self._msgobj('msg_03.txt')
2608 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2609 lines = []
2610 subparts = 0
2611 for subpart in it:
2612 subparts += 1
2613 lines.append(subpart.get_payload())
2614 eq(subparts, 1)
2615 eq(EMPTYSTRING.join(lines), """\
2616
2617Hi,
2618
2619Do you like this message?
2620
2621-Me
2622""")
2623
R. David Murray45bf773f2010-07-17 01:19:57 +00002624 def test_pushCR_LF(self):
2625 '''FeedParser BufferedSubFile.push() assumed it received complete
2626 line endings. A CR ending one push() followed by a LF starting
2627 the next push() added an empty line.
2628 '''
2629 imt = [
2630 ("a\r \n", 2),
2631 ("b", 0),
2632 ("c\n", 1),
2633 ("", 0),
2634 ("d\r\n", 1),
2635 ("e\r", 0),
2636 ("\nf", 1),
2637 ("\r\n", 1),
2638 ]
2639 from email.feedparser import BufferedSubFile, NeedMoreData
2640 bsf = BufferedSubFile()
2641 om = []
2642 nt = 0
2643 for il, n in imt:
2644 bsf.push(il)
2645 nt += n
2646 n1 = 0
2647 while True:
2648 ol = bsf.readline()
2649 if ol == NeedMoreData:
2650 break
2651 om.append(ol)
2652 n1 += 1
2653 self.assertTrue(n == n1)
2654 self.assertTrue(len(om) == nt)
2655 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2656
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002657
Ezio Melottib3aedd42010-11-20 19:04:17 +00002658
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002659class TestParsers(TestEmailBase):
2660 def test_header_parser(self):
2661 eq = self.assertEqual
2662 # Parse only the headers of a complex multipart MIME document
2663 with openfile('msg_02.txt') as fp:
2664 msg = HeaderParser().parse(fp)
2665 eq(msg['from'], 'ppp-request@zzz.org')
2666 eq(msg['to'], 'ppp@zzz.org')
2667 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002668 self.assertFalse(msg.is_multipart())
2669 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002670
2671 def test_whitespace_continuation(self):
2672 eq = self.assertEqual
2673 # This message contains a line after the Subject: header that has only
2674 # whitespace, but it is not empty!
2675 msg = email.message_from_string("""\
2676From: aperson@dom.ain
2677To: bperson@dom.ain
2678Subject: the next line has a space on it
2679\x20
2680Date: Mon, 8 Apr 2002 15:09:19 -0400
2681Message-ID: spam
2682
2683Here's the message body
2684""")
2685 eq(msg['subject'], 'the next line has a space on it\n ')
2686 eq(msg['message-id'], 'spam')
2687 eq(msg.get_payload(), "Here's the message body\n")
2688
2689 def test_whitespace_continuation_last_header(self):
2690 eq = self.assertEqual
2691 # Like the previous test, but the subject line is the last
2692 # header.
2693 msg = email.message_from_string("""\
2694From: aperson@dom.ain
2695To: bperson@dom.ain
2696Date: Mon, 8 Apr 2002 15:09:19 -0400
2697Message-ID: spam
2698Subject: the next line has a space on it
2699\x20
2700
2701Here's the message body
2702""")
2703 eq(msg['subject'], 'the next line has a space on it\n ')
2704 eq(msg['message-id'], 'spam')
2705 eq(msg.get_payload(), "Here's the message body\n")
2706
2707 def test_crlf_separation(self):
2708 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002709 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002710 msg = Parser().parse(fp)
2711 eq(len(msg.get_payload()), 2)
2712 part1 = msg.get_payload(0)
2713 eq(part1.get_content_type(), 'text/plain')
2714 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2715 part2 = msg.get_payload(1)
2716 eq(part2.get_content_type(), 'application/riscos')
2717
R. David Murray8451c4b2010-10-23 22:19:56 +00002718 def test_crlf_flatten(self):
2719 # Using newline='\n' preserves the crlfs in this input file.
2720 with openfile('msg_26.txt', newline='\n') as fp:
2721 text = fp.read()
2722 msg = email.message_from_string(text)
2723 s = StringIO()
2724 g = Generator(s)
2725 g.flatten(msg, linesep='\r\n')
2726 self.assertEqual(s.getvalue(), text)
2727
2728 maxDiff = None
2729
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002730 def test_multipart_digest_with_extra_mime_headers(self):
2731 eq = self.assertEqual
2732 neq = self.ndiffAssertEqual
2733 with openfile('msg_28.txt') as fp:
2734 msg = email.message_from_file(fp)
2735 # Structure is:
2736 # multipart/digest
2737 # message/rfc822
2738 # text/plain
2739 # message/rfc822
2740 # text/plain
2741 eq(msg.is_multipart(), 1)
2742 eq(len(msg.get_payload()), 2)
2743 part1 = msg.get_payload(0)
2744 eq(part1.get_content_type(), 'message/rfc822')
2745 eq(part1.is_multipart(), 1)
2746 eq(len(part1.get_payload()), 1)
2747 part1a = part1.get_payload(0)
2748 eq(part1a.is_multipart(), 0)
2749 eq(part1a.get_content_type(), 'text/plain')
2750 neq(part1a.get_payload(), 'message 1\n')
2751 # next message/rfc822
2752 part2 = msg.get_payload(1)
2753 eq(part2.get_content_type(), 'message/rfc822')
2754 eq(part2.is_multipart(), 1)
2755 eq(len(part2.get_payload()), 1)
2756 part2a = part2.get_payload(0)
2757 eq(part2a.is_multipart(), 0)
2758 eq(part2a.get_content_type(), 'text/plain')
2759 neq(part2a.get_payload(), 'message 2\n')
2760
2761 def test_three_lines(self):
2762 # A bug report by Andrew McNamara
2763 lines = ['From: Andrew Person <aperson@dom.ain',
2764 'Subject: Test',
2765 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2766 msg = email.message_from_string(NL.join(lines))
2767 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
2768
2769 def test_strip_line_feed_and_carriage_return_in_headers(self):
2770 eq = self.assertEqual
2771 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
2772 value1 = 'text'
2773 value2 = 'more text'
2774 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
2775 value1, value2)
2776 msg = email.message_from_string(m)
2777 eq(msg.get('Header'), value1)
2778 eq(msg.get('Next-Header'), value2)
2779
2780 def test_rfc2822_header_syntax(self):
2781 eq = self.assertEqual
2782 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2783 msg = email.message_from_string(m)
2784 eq(len(msg), 3)
2785 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
2786 eq(msg.get_payload(), 'body')
2787
2788 def test_rfc2822_space_not_allowed_in_header(self):
2789 eq = self.assertEqual
2790 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2791 msg = email.message_from_string(m)
2792 eq(len(msg.keys()), 0)
2793
2794 def test_rfc2822_one_character_header(self):
2795 eq = self.assertEqual
2796 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
2797 msg = email.message_from_string(m)
2798 headers = msg.keys()
2799 headers.sort()
2800 eq(headers, ['A', 'B', 'CC'])
2801 eq(msg.get_payload(), 'body')
2802
R. David Murray45e0e142010-06-16 02:19:40 +00002803 def test_CRLFLF_at_end_of_part(self):
2804 # issue 5610: feedparser should not eat two chars from body part ending
2805 # with "\r\n\n".
2806 m = (
2807 "From: foo@bar.com\n"
2808 "To: baz\n"
2809 "Mime-Version: 1.0\n"
2810 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
2811 "\n"
2812 "--BOUNDARY\n"
2813 "Content-Type: text/plain\n"
2814 "\n"
2815 "body ending with CRLF newline\r\n"
2816 "\n"
2817 "--BOUNDARY--\n"
2818 )
2819 msg = email.message_from_string(m)
2820 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002821
Ezio Melottib3aedd42010-11-20 19:04:17 +00002822
R. David Murray96fd54e2010-10-08 15:55:28 +00002823class Test8BitBytesHandling(unittest.TestCase):
2824 # In Python3 all input is string, but that doesn't work if the actual input
2825 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
2826 # decode byte streams using the surrogateescape error handler, and
2827 # reconvert to binary at appropriate places if we detect surrogates. This
2828 # doesn't allow us to transform headers with 8bit bytes (they get munged),
2829 # but it does allow us to parse and preserve them, and to decode body
2830 # parts that use an 8bit CTE.
2831
2832 bodytest_msg = textwrap.dedent("""\
2833 From: foo@bar.com
2834 To: baz
2835 Mime-Version: 1.0
2836 Content-Type: text/plain; charset={charset}
2837 Content-Transfer-Encoding: {cte}
2838
2839 {bodyline}
2840 """)
2841
2842 def test_known_8bit_CTE(self):
2843 m = self.bodytest_msg.format(charset='utf-8',
2844 cte='8bit',
2845 bodyline='pöstal').encode('utf-8')
2846 msg = email.message_from_bytes(m)
2847 self.assertEqual(msg.get_payload(), "pöstal\n")
2848 self.assertEqual(msg.get_payload(decode=True),
2849 "pöstal\n".encode('utf-8'))
2850
2851 def test_unknown_8bit_CTE(self):
2852 m = self.bodytest_msg.format(charset='notavalidcharset',
2853 cte='8bit',
2854 bodyline='pöstal').encode('utf-8')
2855 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00002856 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00002857 self.assertEqual(msg.get_payload(decode=True),
2858 "pöstal\n".encode('utf-8'))
2859
2860 def test_8bit_in_quopri_body(self):
2861 # This is non-RFC compliant data...without 'decode' the library code
2862 # decodes the body using the charset from the headers, and because the
2863 # source byte really is utf-8 this works. This is likely to fail
2864 # against real dirty data (ie: produce mojibake), but the data is
2865 # invalid anyway so it is as good a guess as any. But this means that
2866 # this test just confirms the current behavior; that behavior is not
2867 # necessarily the best possible behavior. With 'decode' it is
2868 # returning the raw bytes, so that test should be of correct behavior,
2869 # or at least produce the same result that email4 did.
2870 m = self.bodytest_msg.format(charset='utf-8',
2871 cte='quoted-printable',
2872 bodyline='p=C3=B6stál').encode('utf-8')
2873 msg = email.message_from_bytes(m)
2874 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
2875 self.assertEqual(msg.get_payload(decode=True),
2876 'pöstál\n'.encode('utf-8'))
2877
2878 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
2879 # This is similar to the previous test, but proves that if the 8bit
2880 # byte is undecodeable in the specified charset, it gets replaced
2881 # by the unicode 'unknown' character. Again, this may or may not
2882 # be the ideal behavior. Note that if decode=False none of the
2883 # decoders will get involved, so this is the only test we need
2884 # for this behavior.
2885 m = self.bodytest_msg.format(charset='ascii',
2886 cte='quoted-printable',
2887 bodyline='p=C3=B6stál').encode('utf-8')
2888 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00002889 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00002890 self.assertEqual(msg.get_payload(decode=True),
2891 'pöstál\n'.encode('utf-8'))
2892
2893 def test_8bit_in_base64_body(self):
2894 # Sticking an 8bit byte in a base64 block makes it undecodable by
2895 # normal means, so the block is returned undecoded, but as bytes.
2896 m = self.bodytest_msg.format(charset='utf-8',
2897 cte='base64',
2898 bodyline='cMO2c3RhbAá=').encode('utf-8')
2899 msg = email.message_from_bytes(m)
2900 self.assertEqual(msg.get_payload(decode=True),
2901 'cMO2c3RhbAá=\n'.encode('utf-8'))
2902
2903 def test_8bit_in_uuencode_body(self):
2904 # Sticking an 8bit byte in a uuencode block makes it undecodable by
2905 # normal means, so the block is returned undecoded, but as bytes.
2906 m = self.bodytest_msg.format(charset='utf-8',
2907 cte='uuencode',
2908 bodyline='<,.V<W1A; á ').encode('utf-8')
2909 msg = email.message_from_bytes(m)
2910 self.assertEqual(msg.get_payload(decode=True),
2911 '<,.V<W1A; á \n'.encode('utf-8'))
2912
2913
R. David Murray92532142011-01-07 23:25:30 +00002914 headertest_headers = (
2915 ('From: foo@bar.com', ('From', 'foo@bar.com')),
2916 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
2917 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
2918 '\tJean de Baddie',
2919 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
2920 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
2921 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
2922 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
2923 )
2924 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
2925 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00002926
2927 def test_get_8bit_header(self):
2928 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00002929 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
2930 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00002931
2932 def test_print_8bit_headers(self):
2933 msg = email.message_from_bytes(self.headertest_msg)
2934 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00002935 textwrap.dedent("""\
2936 From: {}
2937 To: {}
2938 Subject: {}
2939 From: {}
2940
2941 Yes, they are flying.
2942 """).format(*[expected[1] for (_, expected) in
2943 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00002944
2945 def test_values_with_8bit_headers(self):
2946 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00002947 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00002948 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00002949 'b\uFFFD\uFFFDz',
2950 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
2951 'coll\uFFFD\uFFFDgue, le pouf '
2952 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00002953 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00002954 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00002955
2956 def test_items_with_8bit_headers(self):
2957 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00002958 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00002959 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00002960 ('To', 'b\uFFFD\uFFFDz'),
2961 ('Subject', 'Maintenant je vous '
2962 'pr\uFFFD\uFFFDsente '
2963 'mon coll\uFFFD\uFFFDgue, le pouf '
2964 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
2965 '\tJean de Baddie'),
2966 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00002967
2968 def test_get_all_with_8bit_headers(self):
2969 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00002970 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00002971 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00002972 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00002973
2974 non_latin_bin_msg = textwrap.dedent("""\
2975 From: foo@bar.com
2976 To: báz
2977 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
2978 \tJean de Baddie
2979 Mime-Version: 1.0
2980 Content-Type: text/plain; charset="utf-8"
2981 Content-Transfer-Encoding: 8bit
2982
2983 Да, они летят.
2984 """).encode('utf-8')
2985
2986 def test_bytes_generator(self):
2987 msg = email.message_from_bytes(self.non_latin_bin_msg)
2988 out = BytesIO()
2989 email.generator.BytesGenerator(out).flatten(msg)
2990 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
2991
R. David Murray92532142011-01-07 23:25:30 +00002992 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00002993 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00002994 To: =?unknown-8bit?q?b=C3=A1z?=
2995 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
2996 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
2997 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00002998 Mime-Version: 1.0
2999 Content-Type: text/plain; charset="utf-8"
3000 Content-Transfer-Encoding: base64
3001
3002 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3003 """)
3004
3005 def test_generator_handles_8bit(self):
3006 msg = email.message_from_bytes(self.non_latin_bin_msg)
3007 out = StringIO()
3008 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003009 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003010
3011 def test_bytes_generator_with_unix_from(self):
3012 # The unixfrom contains a current date, so we can't check it
3013 # literally. Just make sure the first word is 'From' and the
3014 # rest of the message matches the input.
3015 msg = email.message_from_bytes(self.non_latin_bin_msg)
3016 out = BytesIO()
3017 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3018 lines = out.getvalue().split(b'\n')
3019 self.assertEqual(lines[0].split()[0], b'From')
3020 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3021
R. David Murray92532142011-01-07 23:25:30 +00003022 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3023 non_latin_bin_msg_as7bit[2:4] = [
3024 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3025 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3026 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3027
R. David Murray96fd54e2010-10-08 15:55:28 +00003028 def test_message_from_binary_file(self):
3029 fn = 'test.msg'
3030 self.addCleanup(unlink, fn)
3031 with open(fn, 'wb') as testfile:
3032 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003033 with open(fn, 'rb') as testfile:
3034 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003035 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3036
3037 latin_bin_msg = textwrap.dedent("""\
3038 From: foo@bar.com
3039 To: Dinsdale
3040 Subject: Nudge nudge, wink, wink
3041 Mime-Version: 1.0
3042 Content-Type: text/plain; charset="latin-1"
3043 Content-Transfer-Encoding: 8bit
3044
3045 oh là là, know what I mean, know what I mean?
3046 """).encode('latin-1')
3047
3048 latin_bin_msg_as7bit = textwrap.dedent("""\
3049 From: foo@bar.com
3050 To: Dinsdale
3051 Subject: Nudge nudge, wink, wink
3052 Mime-Version: 1.0
3053 Content-Type: text/plain; charset="iso-8859-1"
3054 Content-Transfer-Encoding: quoted-printable
3055
3056 oh l=E0 l=E0, know what I mean, know what I mean?
3057 """)
3058
3059 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3060 m = email.message_from_bytes(self.latin_bin_msg)
3061 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3062
3063 def test_decoded_generator_emits_unicode_body(self):
3064 m = email.message_from_bytes(self.latin_bin_msg)
3065 out = StringIO()
3066 email.generator.DecodedGenerator(out).flatten(m)
3067 #DecodedHeader output contains an extra blank line compared
3068 #to the input message. RDM: not sure if this is a bug or not,
3069 #but it is not specific to the 8bit->7bit conversion.
3070 self.assertEqual(out.getvalue(),
3071 self.latin_bin_msg.decode('latin-1')+'\n')
3072
3073 def test_bytes_feedparser(self):
3074 bfp = email.feedparser.BytesFeedParser()
3075 for i in range(0, len(self.latin_bin_msg), 10):
3076 bfp.feed(self.latin_bin_msg[i:i+10])
3077 m = bfp.close()
3078 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3079
R. David Murray8451c4b2010-10-23 22:19:56 +00003080 def test_crlf_flatten(self):
3081 with openfile('msg_26.txt', 'rb') as fp:
3082 text = fp.read()
3083 msg = email.message_from_bytes(text)
3084 s = BytesIO()
3085 g = email.generator.BytesGenerator(s)
3086 g.flatten(msg, linesep='\r\n')
3087 self.assertEqual(s.getvalue(), text)
3088 maxDiff = None
3089
Ezio Melottib3aedd42010-11-20 19:04:17 +00003090
R. David Murray719a4492010-11-21 16:53:48 +00003091class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003092
R. David Murraye5db2632010-11-20 15:10:13 +00003093 maxDiff = None
3094
R. David Murray96fd54e2010-10-08 15:55:28 +00003095 def _msgobj(self, filename):
3096 with openfile(filename, 'rb') as fp:
3097 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003098 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003099 msg = email.message_from_bytes(data)
3100 return msg, data
3101
R. David Murray719a4492010-11-21 16:53:48 +00003102 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003103 b = BytesIO()
3104 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003105 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R. David Murraye5db2632010-11-20 15:10:13 +00003106 self.assertByteStringsEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003107
R. David Murraye5db2632010-11-20 15:10:13 +00003108 def assertByteStringsEqual(self, str1, str2):
R. David Murray719a4492010-11-21 16:53:48 +00003109 # Not using self.blinesep here is intentional. This way the output
3110 # is more useful when the failure results in mixed line endings.
R. David Murray96fd54e2010-10-08 15:55:28 +00003111 self.assertListEqual(str1.split(b'\n'), str2.split(b'\n'))
3112
3113
R. David Murray719a4492010-11-21 16:53:48 +00003114class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3115 TestIdempotent):
3116 linesep = '\n'
3117 blinesep = b'\n'
3118 normalize_linesep_regex = re.compile(br'\r\n')
3119
3120
3121class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3122 TestIdempotent):
3123 linesep = '\r\n'
3124 blinesep = b'\r\n'
3125 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3126
Ezio Melottib3aedd42010-11-20 19:04:17 +00003127
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003128class TestBase64(unittest.TestCase):
3129 def test_len(self):
3130 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003131 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003132 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003133 for size in range(15):
3134 if size == 0 : bsize = 0
3135 elif size <= 3 : bsize = 4
3136 elif size <= 6 : bsize = 8
3137 elif size <= 9 : bsize = 12
3138 elif size <= 12: bsize = 16
3139 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003140 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003141
3142 def test_decode(self):
3143 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003144 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003145 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003146
3147 def test_encode(self):
3148 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003149 eq(base64mime.body_encode(b''), b'')
3150 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003151 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003152 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003153 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003154 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003155eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3156eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3157eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3158eHh4eCB4eHh4IA==
3159""")
3160 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003161 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003162 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003163eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3164eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3165eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3166eHh4eCB4eHh4IA==\r
3167""")
3168
3169 def test_header_encode(self):
3170 eq = self.assertEqual
3171 he = base64mime.header_encode
3172 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003173 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3174 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003175 # Test the charset option
3176 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3177 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003178
3179
Ezio Melottib3aedd42010-11-20 19:04:17 +00003180
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003181class TestQuopri(unittest.TestCase):
3182 def setUp(self):
3183 # Set of characters (as byte integers) that don't need to be encoded
3184 # in headers.
3185 self.hlit = list(chain(
3186 range(ord('a'), ord('z') + 1),
3187 range(ord('A'), ord('Z') + 1),
3188 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003189 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003190 # Set of characters (as byte integers) that do need to be encoded in
3191 # headers.
3192 self.hnon = [c for c in range(256) if c not in self.hlit]
3193 assert len(self.hlit) + len(self.hnon) == 256
3194 # Set of characters (as byte integers) that don't need to be encoded
3195 # in bodies.
3196 self.blit = list(range(ord(' '), ord('~') + 1))
3197 self.blit.append(ord('\t'))
3198 self.blit.remove(ord('='))
3199 # Set of characters (as byte integers) that do need to be encoded in
3200 # bodies.
3201 self.bnon = [c for c in range(256) if c not in self.blit]
3202 assert len(self.blit) + len(self.bnon) == 256
3203
Guido van Rossum9604e662007-08-30 03:46:43 +00003204 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003205 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003206 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003207 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003208 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003209 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003210 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003211
Guido van Rossum9604e662007-08-30 03:46:43 +00003212 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003213 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003214 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003215 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003216 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003217 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003218 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003219
3220 def test_header_quopri_len(self):
3221 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003222 eq(quoprimime.header_length(b'hello'), 5)
3223 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003224 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003225 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003226 # =?xxx?q?...?= means 10 extra characters
3227 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003228 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3229 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003230 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003231 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003232 # =?xxx?q?...?= means 10 extra characters
3233 10)
3234 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003235 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003236 'expected length 1 for %r' % chr(c))
3237 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003238 # Space is special; it's encoded to _
3239 if c == ord(' '):
3240 continue
3241 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003242 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003243 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003244
3245 def test_body_quopri_len(self):
3246 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003247 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003248 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003249 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003250 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003251
3252 def test_quote_unquote_idempotent(self):
3253 for x in range(256):
3254 c = chr(x)
3255 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3256
3257 def test_header_encode(self):
3258 eq = self.assertEqual
3259 he = quoprimime.header_encode
3260 eq(he(b'hello'), '=?iso-8859-1?q?hello?=')
3261 eq(he(b'hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=')
3262 eq(he(b'hello\nworld'), '=?iso-8859-1?q?hello=0Aworld?=')
3263 # Test a non-ASCII character
3264 eq(he(b'hello\xc7there'), '=?iso-8859-1?q?hello=C7there?=')
3265
3266 def test_decode(self):
3267 eq = self.assertEqual
3268 eq(quoprimime.decode(''), '')
3269 eq(quoprimime.decode('hello'), 'hello')
3270 eq(quoprimime.decode('hello', 'X'), 'hello')
3271 eq(quoprimime.decode('hello\nworld', 'X'), 'helloXworld')
3272
3273 def test_encode(self):
3274 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003275 eq(quoprimime.body_encode(''), '')
3276 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003277 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003278 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003279 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003280 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003281xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3282 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3283x xxxx xxxx xxxx xxxx=20""")
3284 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003285 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3286 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003287xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3288 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3289x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003290 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003291one line
3292
3293two line"""), """\
3294one line
3295
3296two line""")
3297
3298
Ezio Melottib3aedd42010-11-20 19:04:17 +00003299
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003300# Test the Charset class
3301class TestCharset(unittest.TestCase):
3302 def tearDown(self):
3303 from email import charset as CharsetModule
3304 try:
3305 del CharsetModule.CHARSETS['fake']
3306 except KeyError:
3307 pass
3308
Guido van Rossum9604e662007-08-30 03:46:43 +00003309 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003310 eq = self.assertEqual
3311 # Make sure us-ascii = no Unicode conversion
3312 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00003313 eq(c.header_encode('Hello World!'), 'Hello World!')
3314 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003315 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00003316 self.assertRaises(UnicodeError, c.header_encode, s)
3317 c = Charset('utf-8')
3318 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003319
3320 def test_body_encode(self):
3321 eq = self.assertEqual
3322 # Try a charset with QP body encoding
3323 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003324 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003325 # Try a charset with Base64 body encoding
3326 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003327 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003328 # Try a charset with None body encoding
3329 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003330 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003331 # Try the convert argument, where input codec != output codec
3332 c = Charset('euc-jp')
3333 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00003334 # XXX FIXME
3335## try:
3336## eq('\x1b$B5FCO;~IW\x1b(B',
3337## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
3338## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
3339## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
3340## except LookupError:
3341## # We probably don't have the Japanese codecs installed
3342## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003343 # Testing SF bug #625509, which we have to fake, since there are no
3344 # built-in encodings where the header encoding is QP but the body
3345 # encoding is not.
3346 from email import charset as CharsetModule
3347 CharsetModule.add_charset('fake', CharsetModule.QP, None)
3348 c = Charset('fake')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003349 eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003350
3351 def test_unicode_charset_name(self):
3352 charset = Charset('us-ascii')
3353 self.assertEqual(str(charset), 'us-ascii')
3354 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
3355
3356
Ezio Melottib3aedd42010-11-20 19:04:17 +00003357
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003358# Test multilingual MIME headers.
3359class TestHeader(TestEmailBase):
3360 def test_simple(self):
3361 eq = self.ndiffAssertEqual
3362 h = Header('Hello World!')
3363 eq(h.encode(), 'Hello World!')
3364 h.append(' Goodbye World!')
3365 eq(h.encode(), 'Hello World! Goodbye World!')
3366
3367 def test_simple_surprise(self):
3368 eq = self.ndiffAssertEqual
3369 h = Header('Hello World!')
3370 eq(h.encode(), 'Hello World!')
3371 h.append('Goodbye World!')
3372 eq(h.encode(), 'Hello World! Goodbye World!')
3373
3374 def test_header_needs_no_decoding(self):
3375 h = 'no decoding needed'
3376 self.assertEqual(decode_header(h), [(h, None)])
3377
3378 def test_long(self):
3379 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
3380 maxlinelen=76)
3381 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003382 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003383
3384 def test_multilingual(self):
3385 eq = self.ndiffAssertEqual
3386 g = Charset("iso-8859-1")
3387 cz = Charset("iso-8859-2")
3388 utf8 = Charset("utf-8")
3389 g_head = (b'Die Mieter treten hier ein werden mit einem '
3390 b'Foerderband komfortabel den Korridor entlang, '
3391 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
3392 b'gegen die rotierenden Klingen bef\xf6rdert. ')
3393 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
3394 b'd\xf9vtipu.. ')
3395 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
3396 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
3397 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
3398 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
3399 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
3400 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
3401 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
3402 '\u3044\u307e\u3059\u3002')
3403 h = Header(g_head, g)
3404 h.append(cz_head, cz)
3405 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00003406 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003407 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00003408=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
3409 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
3410 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
3411 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003412 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
3413 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
3414 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
3415 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00003416 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
3417 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
3418 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
3419 decoded = decode_header(enc)
3420 eq(len(decoded), 3)
3421 eq(decoded[0], (g_head, 'iso-8859-1'))
3422 eq(decoded[1], (cz_head, 'iso-8859-2'))
3423 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003424 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00003425 eq(ustr,
3426 (b'Die Mieter treten hier ein werden mit einem Foerderband '
3427 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
3428 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
3429 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
3430 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
3431 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
3432 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
3433 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
3434 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
3435 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
3436 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
3437 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
3438 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
3439 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
3440 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
3441 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
3442 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003443 # Test make_header()
3444 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00003445 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003446
3447 def test_empty_header_encode(self):
3448 h = Header()
3449 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00003450
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003451 def test_header_ctor_default_args(self):
3452 eq = self.ndiffAssertEqual
3453 h = Header()
3454 eq(h, '')
3455 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00003456 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003457
3458 def test_explicit_maxlinelen(self):
3459 eq = self.ndiffAssertEqual
3460 hstr = ('A very long line that must get split to something other '
3461 'than at the 76th character boundary to test the non-default '
3462 'behavior')
3463 h = Header(hstr)
3464 eq(h.encode(), '''\
3465A very long line that must get split to something other than at the 76th
3466 character boundary to test the non-default behavior''')
3467 eq(str(h), hstr)
3468 h = Header(hstr, header_name='Subject')
3469 eq(h.encode(), '''\
3470A very long line that must get split to something other than at the
3471 76th character boundary to test the non-default behavior''')
3472 eq(str(h), hstr)
3473 h = Header(hstr, maxlinelen=1024, header_name='Subject')
3474 eq(h.encode(), hstr)
3475 eq(str(h), hstr)
3476
Guido van Rossum9604e662007-08-30 03:46:43 +00003477 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003478 eq = self.ndiffAssertEqual
3479 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003480 x = 'xxxx ' * 20
3481 h.append(x)
3482 s = h.encode()
3483 eq(s, """\
3484=?iso-8859-1?q?xxx?=
3485 =?iso-8859-1?q?x_?=
3486 =?iso-8859-1?q?xx?=
3487 =?iso-8859-1?q?xx?=
3488 =?iso-8859-1?q?_x?=
3489 =?iso-8859-1?q?xx?=
3490 =?iso-8859-1?q?x_?=
3491 =?iso-8859-1?q?xx?=
3492 =?iso-8859-1?q?xx?=
3493 =?iso-8859-1?q?_x?=
3494 =?iso-8859-1?q?xx?=
3495 =?iso-8859-1?q?x_?=
3496 =?iso-8859-1?q?xx?=
3497 =?iso-8859-1?q?xx?=
3498 =?iso-8859-1?q?_x?=
3499 =?iso-8859-1?q?xx?=
3500 =?iso-8859-1?q?x_?=
3501 =?iso-8859-1?q?xx?=
3502 =?iso-8859-1?q?xx?=
3503 =?iso-8859-1?q?_x?=
3504 =?iso-8859-1?q?xx?=
3505 =?iso-8859-1?q?x_?=
3506 =?iso-8859-1?q?xx?=
3507 =?iso-8859-1?q?xx?=
3508 =?iso-8859-1?q?_x?=
3509 =?iso-8859-1?q?xx?=
3510 =?iso-8859-1?q?x_?=
3511 =?iso-8859-1?q?xx?=
3512 =?iso-8859-1?q?xx?=
3513 =?iso-8859-1?q?_x?=
3514 =?iso-8859-1?q?xx?=
3515 =?iso-8859-1?q?x_?=
3516 =?iso-8859-1?q?xx?=
3517 =?iso-8859-1?q?xx?=
3518 =?iso-8859-1?q?_x?=
3519 =?iso-8859-1?q?xx?=
3520 =?iso-8859-1?q?x_?=
3521 =?iso-8859-1?q?xx?=
3522 =?iso-8859-1?q?xx?=
3523 =?iso-8859-1?q?_x?=
3524 =?iso-8859-1?q?xx?=
3525 =?iso-8859-1?q?x_?=
3526 =?iso-8859-1?q?xx?=
3527 =?iso-8859-1?q?xx?=
3528 =?iso-8859-1?q?_x?=
3529 =?iso-8859-1?q?xx?=
3530 =?iso-8859-1?q?x_?=
3531 =?iso-8859-1?q?xx?=
3532 =?iso-8859-1?q?xx?=
3533 =?iso-8859-1?q?_?=""")
3534 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003535 h = Header(charset='iso-8859-1', maxlinelen=40)
3536 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003537 s = h.encode()
3538 eq(s, """\
3539=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
3540 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
3541 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
3542 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
3543 =?iso-8859-1?q?_xxxx_xxxx_?=""")
3544 eq(x, str(make_header(decode_header(s))))
3545
3546 def test_base64_splittable(self):
3547 eq = self.ndiffAssertEqual
3548 h = Header(charset='koi8-r', maxlinelen=20)
3549 x = 'xxxx ' * 20
3550 h.append(x)
3551 s = h.encode()
3552 eq(s, """\
3553=?koi8-r?b?eHh4?=
3554 =?koi8-r?b?eCB4?=
3555 =?koi8-r?b?eHh4?=
3556 =?koi8-r?b?IHh4?=
3557 =?koi8-r?b?eHgg?=
3558 =?koi8-r?b?eHh4?=
3559 =?koi8-r?b?eCB4?=
3560 =?koi8-r?b?eHh4?=
3561 =?koi8-r?b?IHh4?=
3562 =?koi8-r?b?eHgg?=
3563 =?koi8-r?b?eHh4?=
3564 =?koi8-r?b?eCB4?=
3565 =?koi8-r?b?eHh4?=
3566 =?koi8-r?b?IHh4?=
3567 =?koi8-r?b?eHgg?=
3568 =?koi8-r?b?eHh4?=
3569 =?koi8-r?b?eCB4?=
3570 =?koi8-r?b?eHh4?=
3571 =?koi8-r?b?IHh4?=
3572 =?koi8-r?b?eHgg?=
3573 =?koi8-r?b?eHh4?=
3574 =?koi8-r?b?eCB4?=
3575 =?koi8-r?b?eHh4?=
3576 =?koi8-r?b?IHh4?=
3577 =?koi8-r?b?eHgg?=
3578 =?koi8-r?b?eHh4?=
3579 =?koi8-r?b?eCB4?=
3580 =?koi8-r?b?eHh4?=
3581 =?koi8-r?b?IHh4?=
3582 =?koi8-r?b?eHgg?=
3583 =?koi8-r?b?eHh4?=
3584 =?koi8-r?b?eCB4?=
3585 =?koi8-r?b?eHh4?=
3586 =?koi8-r?b?IA==?=""")
3587 eq(x, str(make_header(decode_header(s))))
3588 h = Header(charset='koi8-r', maxlinelen=40)
3589 h.append(x)
3590 s = h.encode()
3591 eq(s, """\
3592=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
3593 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
3594 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
3595 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
3596 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
3597 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
3598 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003599
3600 def test_us_ascii_header(self):
3601 eq = self.assertEqual
3602 s = 'hello'
3603 x = decode_header(s)
3604 eq(x, [('hello', None)])
3605 h = make_header(x)
3606 eq(s, h.encode())
3607
3608 def test_string_charset(self):
3609 eq = self.assertEqual
3610 h = Header()
3611 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00003612 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003613
3614## def test_unicode_error(self):
3615## raises = self.assertRaises
3616## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
3617## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
3618## h = Header()
3619## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
3620## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
3621## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
3622
3623 def test_utf8_shortest(self):
3624 eq = self.assertEqual
3625 h = Header('p\xf6stal', 'utf-8')
3626 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
3627 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
3628 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
3629
3630 def test_bad_8bit_header(self):
3631 raises = self.assertRaises
3632 eq = self.assertEqual
3633 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
3634 raises(UnicodeError, Header, x)
3635 h = Header()
3636 raises(UnicodeError, h.append, x)
3637 e = x.decode('utf-8', 'replace')
3638 eq(str(Header(x, errors='replace')), e)
3639 h.append(x, errors='replace')
3640 eq(str(h), e)
3641
3642 def test_encoded_adjacent_nonencoded(self):
3643 eq = self.assertEqual
3644 h = Header()
3645 h.append('hello', 'iso-8859-1')
3646 h.append('world')
3647 s = h.encode()
3648 eq(s, '=?iso-8859-1?q?hello?= world')
3649 h = make_header(decode_header(s))
3650 eq(h.encode(), s)
3651
3652 def test_whitespace_eater(self):
3653 eq = self.assertEqual
3654 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
3655 parts = decode_header(s)
3656 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
3657 hdr = make_header(parts)
3658 eq(hdr.encode(),
3659 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
3660
3661 def test_broken_base64_header(self):
3662 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00003663 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003664 raises(errors.HeaderParseError, decode_header, s)
3665
R. David Murray477efb32011-01-05 01:39:32 +00003666 def test_shift_jis_charset(self):
3667 h = Header('文', charset='shift_jis')
3668 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
3669
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003670
Ezio Melottib3aedd42010-11-20 19:04:17 +00003671
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003672# Test RFC 2231 header parameters (en/de)coding
3673class TestRFC2231(TestEmailBase):
3674 def test_get_param(self):
3675 eq = self.assertEqual
3676 msg = self._msgobj('msg_29.txt')
3677 eq(msg.get_param('title'),
3678 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3679 eq(msg.get_param('title', unquote=False),
3680 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
3681
3682 def test_set_param(self):
3683 eq = self.ndiffAssertEqual
3684 msg = Message()
3685 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3686 charset='us-ascii')
3687 eq(msg.get_param('title'),
3688 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
3689 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3690 charset='us-ascii', language='en')
3691 eq(msg.get_param('title'),
3692 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3693 msg = self._msgobj('msg_01.txt')
3694 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3695 charset='us-ascii', language='en')
3696 eq(msg.as_string(maxheaderlen=78), """\
3697Return-Path: <bbb@zzz.org>
3698Delivered-To: bbb@zzz.org
3699Received: by mail.zzz.org (Postfix, from userid 889)
3700\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3701MIME-Version: 1.0
3702Content-Transfer-Encoding: 7bit
3703Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3704From: bbb@ddd.com (John X. Doe)
3705To: bbb@zzz.org
3706Subject: This is a test message
3707Date: Fri, 4 May 2001 14:05:44 -0400
3708Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00003709 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003710
3711
3712Hi,
3713
3714Do you like this message?
3715
3716-Me
3717""")
3718
3719 def test_del_param(self):
3720 eq = self.ndiffAssertEqual
3721 msg = self._msgobj('msg_01.txt')
3722 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
3723 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3724 charset='us-ascii', language='en')
3725 msg.del_param('foo', header='Content-Type')
3726 eq(msg.as_string(maxheaderlen=78), """\
3727Return-Path: <bbb@zzz.org>
3728Delivered-To: bbb@zzz.org
3729Received: by mail.zzz.org (Postfix, from userid 889)
3730\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3731MIME-Version: 1.0
3732Content-Transfer-Encoding: 7bit
3733Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3734From: bbb@ddd.com (John X. Doe)
3735To: bbb@zzz.org
3736Subject: This is a test message
3737Date: Fri, 4 May 2001 14:05:44 -0400
3738Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00003739 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003740
3741
3742Hi,
3743
3744Do you like this message?
3745
3746-Me
3747""")
3748
3749 def test_rfc2231_get_content_charset(self):
3750 eq = self.assertEqual
3751 msg = self._msgobj('msg_32.txt')
3752 eq(msg.get_content_charset(), 'us-ascii')
3753
R. David Murraydfd7eb02010-12-24 22:36:49 +00003754 def test_rfc2231_parse_rfc_quoting(self):
3755 m = textwrap.dedent('''\
3756 Content-Disposition: inline;
3757 \tfilename*0*=''This%20is%20even%20more%20;
3758 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
3759 \tfilename*2="is it not.pdf"
3760
3761 ''')
3762 msg = email.message_from_string(m)
3763 self.assertEqual(msg.get_filename(),
3764 'This is even more ***fun*** is it not.pdf')
3765 self.assertEqual(m, msg.as_string())
3766
3767 def test_rfc2231_parse_extra_quoting(self):
3768 m = textwrap.dedent('''\
3769 Content-Disposition: inline;
3770 \tfilename*0*="''This%20is%20even%20more%20";
3771 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3772 \tfilename*2="is it not.pdf"
3773
3774 ''')
3775 msg = email.message_from_string(m)
3776 self.assertEqual(msg.get_filename(),
3777 'This is even more ***fun*** is it not.pdf')
3778 self.assertEqual(m, msg.as_string())
3779
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003780 def test_rfc2231_no_language_or_charset(self):
3781 m = '''\
3782Content-Transfer-Encoding: 8bit
3783Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
3784Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
3785
3786'''
3787 msg = email.message_from_string(m)
3788 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003789 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003790 self.assertEqual(
3791 param,
3792 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
3793
3794 def test_rfc2231_no_language_or_charset_in_filename(self):
3795 m = '''\
3796Content-Disposition: inline;
3797\tfilename*0*="''This%20is%20even%20more%20";
3798\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3799\tfilename*2="is it not.pdf"
3800
3801'''
3802 msg = email.message_from_string(m)
3803 self.assertEqual(msg.get_filename(),
3804 'This is even more ***fun*** is it not.pdf')
3805
3806 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
3807 m = '''\
3808Content-Disposition: inline;
3809\tfilename*0*="''This%20is%20even%20more%20";
3810\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3811\tfilename*2="is it not.pdf"
3812
3813'''
3814 msg = email.message_from_string(m)
3815 self.assertEqual(msg.get_filename(),
3816 'This is even more ***fun*** is it not.pdf')
3817
3818 def test_rfc2231_partly_encoded(self):
3819 m = '''\
3820Content-Disposition: inline;
3821\tfilename*0="''This%20is%20even%20more%20";
3822\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3823\tfilename*2="is it not.pdf"
3824
3825'''
3826 msg = email.message_from_string(m)
3827 self.assertEqual(
3828 msg.get_filename(),
3829 'This%20is%20even%20more%20***fun*** is it not.pdf')
3830
3831 def test_rfc2231_partly_nonencoded(self):
3832 m = '''\
3833Content-Disposition: inline;
3834\tfilename*0="This%20is%20even%20more%20";
3835\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
3836\tfilename*2="is it not.pdf"
3837
3838'''
3839 msg = email.message_from_string(m)
3840 self.assertEqual(
3841 msg.get_filename(),
3842 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
3843
3844 def test_rfc2231_no_language_or_charset_in_boundary(self):
3845 m = '''\
3846Content-Type: multipart/alternative;
3847\tboundary*0*="''This%20is%20even%20more%20";
3848\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
3849\tboundary*2="is it not.pdf"
3850
3851'''
3852 msg = email.message_from_string(m)
3853 self.assertEqual(msg.get_boundary(),
3854 'This is even more ***fun*** is it not.pdf')
3855
3856 def test_rfc2231_no_language_or_charset_in_charset(self):
3857 # This is a nonsensical charset value, but tests the code anyway
3858 m = '''\
3859Content-Type: text/plain;
3860\tcharset*0*="This%20is%20even%20more%20";
3861\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
3862\tcharset*2="is it not.pdf"
3863
3864'''
3865 msg = email.message_from_string(m)
3866 self.assertEqual(msg.get_content_charset(),
3867 'this is even more ***fun*** is it not.pdf')
3868
3869 def test_rfc2231_bad_encoding_in_filename(self):
3870 m = '''\
3871Content-Disposition: inline;
3872\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
3873\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3874\tfilename*2="is it not.pdf"
3875
3876'''
3877 msg = email.message_from_string(m)
3878 self.assertEqual(msg.get_filename(),
3879 'This is even more ***fun*** is it not.pdf')
3880
3881 def test_rfc2231_bad_encoding_in_charset(self):
3882 m = """\
3883Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
3884
3885"""
3886 msg = email.message_from_string(m)
3887 # This should return None because non-ascii characters in the charset
3888 # are not allowed.
3889 self.assertEqual(msg.get_content_charset(), None)
3890
3891 def test_rfc2231_bad_character_in_charset(self):
3892 m = """\
3893Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
3894
3895"""
3896 msg = email.message_from_string(m)
3897 # This should return None because non-ascii characters in the charset
3898 # are not allowed.
3899 self.assertEqual(msg.get_content_charset(), None)
3900
3901 def test_rfc2231_bad_character_in_filename(self):
3902 m = '''\
3903Content-Disposition: inline;
3904\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
3905\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3906\tfilename*2*="is it not.pdf%E2"
3907
3908'''
3909 msg = email.message_from_string(m)
3910 self.assertEqual(msg.get_filename(),
3911 'This is even more ***fun*** is it not.pdf\ufffd')
3912
3913 def test_rfc2231_unknown_encoding(self):
3914 m = """\
3915Content-Transfer-Encoding: 8bit
3916Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
3917
3918"""
3919 msg = email.message_from_string(m)
3920 self.assertEqual(msg.get_filename(), 'myfile.txt')
3921
3922 def test_rfc2231_single_tick_in_filename_extended(self):
3923 eq = self.assertEqual
3924 m = """\
3925Content-Type: application/x-foo;
3926\tname*0*=\"Frank's\"; name*1*=\" Document\"
3927
3928"""
3929 msg = email.message_from_string(m)
3930 charset, language, s = msg.get_param('name')
3931 eq(charset, None)
3932 eq(language, None)
3933 eq(s, "Frank's Document")
3934
3935 def test_rfc2231_single_tick_in_filename(self):
3936 m = """\
3937Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
3938
3939"""
3940 msg = email.message_from_string(m)
3941 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003942 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003943 self.assertEqual(param, "Frank's Document")
3944
3945 def test_rfc2231_tick_attack_extended(self):
3946 eq = self.assertEqual
3947 m = """\
3948Content-Type: application/x-foo;
3949\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
3950
3951"""
3952 msg = email.message_from_string(m)
3953 charset, language, s = msg.get_param('name')
3954 eq(charset, 'us-ascii')
3955 eq(language, 'en-us')
3956 eq(s, "Frank's Document")
3957
3958 def test_rfc2231_tick_attack(self):
3959 m = """\
3960Content-Type: application/x-foo;
3961\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
3962
3963"""
3964 msg = email.message_from_string(m)
3965 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003966 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003967 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
3968
3969 def test_rfc2231_no_extended_values(self):
3970 eq = self.assertEqual
3971 m = """\
3972Content-Type: application/x-foo; name=\"Frank's Document\"
3973
3974"""
3975 msg = email.message_from_string(m)
3976 eq(msg.get_param('name'), "Frank's Document")
3977
3978 def test_rfc2231_encoded_then_unencoded_segments(self):
3979 eq = self.assertEqual
3980 m = """\
3981Content-Type: application/x-foo;
3982\tname*0*=\"us-ascii'en-us'My\";
3983\tname*1=\" Document\";
3984\tname*2*=\" For You\"
3985
3986"""
3987 msg = email.message_from_string(m)
3988 charset, language, s = msg.get_param('name')
3989 eq(charset, 'us-ascii')
3990 eq(language, 'en-us')
3991 eq(s, 'My Document For You')
3992
3993 def test_rfc2231_unencoded_then_encoded_segments(self):
3994 eq = self.assertEqual
3995 m = """\
3996Content-Type: application/x-foo;
3997\tname*0=\"us-ascii'en-us'My\";
3998\tname*1*=\" Document\";
3999\tname*2*=\" For You\"
4000
4001"""
4002 msg = email.message_from_string(m)
4003 charset, language, s = msg.get_param('name')
4004 eq(charset, 'us-ascii')
4005 eq(language, 'en-us')
4006 eq(s, 'My Document For You')
4007
4008
Ezio Melottib3aedd42010-11-20 19:04:17 +00004009
R. David Murraya8f480f2010-01-16 18:30:03 +00004010# Tests to ensure that signed parts of an email are completely preserved, as
4011# required by RFC1847 section 2.1. Note that these are incomplete, because the
4012# email package does not currently always preserve the body. See issue 1670765.
4013class TestSigned(TestEmailBase):
4014
4015 def _msg_and_obj(self, filename):
4016 with openfile(findfile(filename)) as fp:
4017 original = fp.read()
4018 msg = email.message_from_string(original)
4019 return original, msg
4020
4021 def _signed_parts_eq(self, original, result):
4022 # Extract the first mime part of each message
4023 import re
4024 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4025 inpart = repart.search(original).group(2)
4026 outpart = repart.search(result).group(2)
4027 self.assertEqual(outpart, inpart)
4028
4029 def test_long_headers_as_string(self):
4030 original, msg = self._msg_and_obj('msg_45.txt')
4031 result = msg.as_string()
4032 self._signed_parts_eq(original, result)
4033
4034 def test_long_headers_as_string_maxheaderlen(self):
4035 original, msg = self._msg_and_obj('msg_45.txt')
4036 result = msg.as_string(maxheaderlen=60)
4037 self._signed_parts_eq(original, result)
4038
4039 def test_long_headers_flatten(self):
4040 original, msg = self._msg_and_obj('msg_45.txt')
4041 fp = StringIO()
4042 Generator(fp).flatten(msg)
4043 result = fp.getvalue()
4044 self._signed_parts_eq(original, result)
4045
4046
Ezio Melottib3aedd42010-11-20 19:04:17 +00004047
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004048def _testclasses():
4049 mod = sys.modules[__name__]
4050 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
4051
4052
4053def suite():
4054 suite = unittest.TestSuite()
4055 for testclass in _testclasses():
4056 suite.addTest(unittest.makeSuite(testclass))
4057 return suite
4058
4059
4060def test_main():
4061 for testclass in _testclasses():
4062 run_unittest(testclass)
4063
4064
Ezio Melottib3aedd42010-11-20 19:04:17 +00004065
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004066if __name__ == '__main__':
4067 unittest.main(defaultTest='suite')