blob: f8bdaa2bbad0d6982745c2c14b7bc3bc6917d41f [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R. David Murray96fd54e2010-10-08 15:55:28 +000039from test.support import findfile, run_unittest, unlink
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040from email.test import __file__ as landmark
41
42
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Ezio Melottib3aedd42010-11-20 19:04:17 +000048
Guido van Rossum8b3febe2007-08-30 01:15:14 +000049def openfile(filename, *args, **kws):
50 path = os.path.join(os.path.dirname(landmark), 'data', filename)
51 return open(path, *args, **kws)
52
53
Ezio Melottib3aedd42010-11-20 19:04:17 +000054
Guido van Rossum8b3febe2007-08-30 01:15:14 +000055# Base test class
56class TestEmailBase(unittest.TestCase):
57 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000058 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000059 if first != second:
60 sfirst = str(first)
61 ssecond = str(second)
62 rfirst = [repr(line) for line in sfirst.splitlines()]
63 rsecond = [repr(line) for line in ssecond.splitlines()]
64 diff = difflib.ndiff(rfirst, rsecond)
65 raise self.failureException(NL + NL.join(diff))
66
67 def _msgobj(self, filename):
68 with openfile(findfile(filename)) as fp:
69 return email.message_from_file(fp)
70
71
Ezio Melottib3aedd42010-11-20 19:04:17 +000072
Guido van Rossum8b3febe2007-08-30 01:15:14 +000073# Test various aspects of the Message class's API
74class TestMessageAPI(TestEmailBase):
75 def test_get_all(self):
76 eq = self.assertEqual
77 msg = self._msgobj('msg_20.txt')
78 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
79 eq(msg.get_all('xx', 'n/a'), 'n/a')
80
R. David Murraye5db2632010-11-20 15:10:13 +000081 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 eq = self.assertEqual
83 msg = Message()
84 eq(msg.get_charset(), None)
85 charset = Charset('iso-8859-1')
86 msg.set_charset(charset)
87 eq(msg['mime-version'], '1.0')
88 eq(msg.get_content_type(), 'text/plain')
89 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
90 eq(msg.get_param('charset'), 'iso-8859-1')
91 eq(msg['content-transfer-encoding'], 'quoted-printable')
92 eq(msg.get_charset().input_charset, 'iso-8859-1')
93 # Remove the charset
94 msg.set_charset(None)
95 eq(msg.get_charset(), None)
96 eq(msg['content-type'], 'text/plain')
97 # Try adding a charset when there's already MIME headers present
98 msg = Message()
99 msg['MIME-Version'] = '2.0'
100 msg['Content-Type'] = 'text/x-weird'
101 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
102 msg.set_charset(charset)
103 eq(msg['mime-version'], '2.0')
104 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
105 eq(msg['content-transfer-encoding'], 'quinted-puntable')
106
107 def test_set_charset_from_string(self):
108 eq = self.assertEqual
109 msg = Message()
110 msg.set_charset('us-ascii')
111 eq(msg.get_charset().input_charset, 'us-ascii')
112 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
113
114 def test_set_payload_with_charset(self):
115 msg = Message()
116 charset = Charset('iso-8859-1')
117 msg.set_payload('This is a string payload', charset)
118 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
119
120 def test_get_charsets(self):
121 eq = self.assertEqual
122
123 msg = self._msgobj('msg_08.txt')
124 charsets = msg.get_charsets()
125 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
126
127 msg = self._msgobj('msg_09.txt')
128 charsets = msg.get_charsets('dingbat')
129 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
130 'koi8-r'])
131
132 msg = self._msgobj('msg_12.txt')
133 charsets = msg.get_charsets()
134 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
135 'iso-8859-3', 'us-ascii', 'koi8-r'])
136
137 def test_get_filename(self):
138 eq = self.assertEqual
139
140 msg = self._msgobj('msg_04.txt')
141 filenames = [p.get_filename() for p in msg.get_payload()]
142 eq(filenames, ['msg.txt', 'msg.txt'])
143
144 msg = self._msgobj('msg_07.txt')
145 subpart = msg.get_payload(1)
146 eq(subpart.get_filename(), 'dingusfish.gif')
147
148 def test_get_filename_with_name_parameter(self):
149 eq = self.assertEqual
150
151 msg = self._msgobj('msg_44.txt')
152 filenames = [p.get_filename() for p in msg.get_payload()]
153 eq(filenames, ['msg.txt', 'msg.txt'])
154
155 def test_get_boundary(self):
156 eq = self.assertEqual
157 msg = self._msgobj('msg_07.txt')
158 # No quotes!
159 eq(msg.get_boundary(), 'BOUNDARY')
160
161 def test_set_boundary(self):
162 eq = self.assertEqual
163 # This one has no existing boundary parameter, but the Content-Type:
164 # header appears fifth.
165 msg = self._msgobj('msg_01.txt')
166 msg.set_boundary('BOUNDARY')
167 header, value = msg.items()[4]
168 eq(header.lower(), 'content-type')
169 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
170 # This one has a Content-Type: header, with a boundary, stuck in the
171 # middle of its headers. Make sure the order is preserved; it should
172 # be fifth.
173 msg = self._msgobj('msg_04.txt')
174 msg.set_boundary('BOUNDARY')
175 header, value = msg.items()[4]
176 eq(header.lower(), 'content-type')
177 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
178 # And this one has no Content-Type: header at all.
179 msg = self._msgobj('msg_03.txt')
180 self.assertRaises(errors.HeaderParseError,
181 msg.set_boundary, 'BOUNDARY')
182
R. David Murray73a559d2010-12-21 18:07:59 +0000183 def test_make_boundary(self):
184 msg = MIMEMultipart('form-data')
185 # Note that when the boundary gets created is an implementation
186 # detail and might change.
187 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
188 # Trigger creation of boundary
189 msg.as_string()
190 self.assertEqual(msg.items()[0][1][:33],
191 'multipart/form-data; boundary="==')
192 # XXX: there ought to be tests of the uniqueness of the boundary, too.
193
R. David Murray57c45ac2010-02-21 04:39:40 +0000194 def test_message_rfc822_only(self):
195 # Issue 7970: message/rfc822 not in multipart parsed by
196 # HeaderParser caused an exception when flattened.
Brett Cannon384917a2010-10-29 23:08:36 +0000197 with openfile(findfile('msg_46.txt')) as fp:
198 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000199 parser = HeaderParser()
200 msg = parser.parsestr(msgdata)
201 out = StringIO()
202 gen = Generator(out, True, 0)
203 gen.flatten(msg, False)
204 self.assertEqual(out.getvalue(), msgdata)
205
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000206 def test_get_decoded_payload(self):
207 eq = self.assertEqual
208 msg = self._msgobj('msg_10.txt')
209 # The outer message is a multipart
210 eq(msg.get_payload(decode=True), None)
211 # Subpart 1 is 7bit encoded
212 eq(msg.get_payload(0).get_payload(decode=True),
213 b'This is a 7bit encoded message.\n')
214 # Subpart 2 is quopri
215 eq(msg.get_payload(1).get_payload(decode=True),
216 b'\xa1This is a Quoted Printable encoded message!\n')
217 # Subpart 3 is base64
218 eq(msg.get_payload(2).get_payload(decode=True),
219 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000220 # Subpart 4 is base64 with a trailing newline, which
221 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000222 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000223 b'This is a Base64 encoded message.\n')
224 # Subpart 5 has no Content-Transfer-Encoding: header.
225 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000226 b'This has no Content-Transfer-Encoding: header.\n')
227
228 def test_get_decoded_uu_payload(self):
229 eq = self.assertEqual
230 msg = Message()
231 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
232 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
233 msg['content-transfer-encoding'] = cte
234 eq(msg.get_payload(decode=True), b'hello world')
235 # Now try some bogus data
236 msg.set_payload('foo')
237 eq(msg.get_payload(decode=True), b'foo')
238
239 def test_decoded_generator(self):
240 eq = self.assertEqual
241 msg = self._msgobj('msg_07.txt')
242 with openfile('msg_17.txt') as fp:
243 text = fp.read()
244 s = StringIO()
245 g = DecodedGenerator(s)
246 g.flatten(msg)
247 eq(s.getvalue(), text)
248
249 def test__contains__(self):
250 msg = Message()
251 msg['From'] = 'Me'
252 msg['to'] = 'You'
253 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000254 self.assertTrue('from' in msg)
255 self.assertTrue('From' in msg)
256 self.assertTrue('FROM' in msg)
257 self.assertTrue('to' in msg)
258 self.assertTrue('To' in msg)
259 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000260
261 def test_as_string(self):
262 eq = self.ndiffAssertEqual
263 msg = self._msgobj('msg_01.txt')
264 with openfile('msg_01.txt') as fp:
265 text = fp.read()
266 eq(text, str(msg))
267 fullrepr = msg.as_string(unixfrom=True)
268 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000269 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000270 eq(text, NL.join(lines[1:]))
271
272 def test_bad_param(self):
273 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
274 self.assertEqual(msg.get_param('baz'), '')
275
276 def test_missing_filename(self):
277 msg = email.message_from_string("From: foo\n")
278 self.assertEqual(msg.get_filename(), None)
279
280 def test_bogus_filename(self):
281 msg = email.message_from_string(
282 "Content-Disposition: blarg; filename\n")
283 self.assertEqual(msg.get_filename(), '')
284
285 def test_missing_boundary(self):
286 msg = email.message_from_string("From: foo\n")
287 self.assertEqual(msg.get_boundary(), None)
288
289 def test_get_params(self):
290 eq = self.assertEqual
291 msg = email.message_from_string(
292 'X-Header: foo=one; bar=two; baz=three\n')
293 eq(msg.get_params(header='x-header'),
294 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
295 msg = email.message_from_string(
296 'X-Header: foo; bar=one; baz=two\n')
297 eq(msg.get_params(header='x-header'),
298 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
299 eq(msg.get_params(), None)
300 msg = email.message_from_string(
301 'X-Header: foo; bar="one"; baz=two\n')
302 eq(msg.get_params(header='x-header'),
303 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
304
305 def test_get_param_liberal(self):
306 msg = Message()
307 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
308 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
309
310 def test_get_param(self):
311 eq = self.assertEqual
312 msg = email.message_from_string(
313 "X-Header: foo=one; bar=two; baz=three\n")
314 eq(msg.get_param('bar', header='x-header'), 'two')
315 eq(msg.get_param('quuz', header='x-header'), None)
316 eq(msg.get_param('quuz'), None)
317 msg = email.message_from_string(
318 'X-Header: foo; bar="one"; baz=two\n')
319 eq(msg.get_param('foo', header='x-header'), '')
320 eq(msg.get_param('bar', header='x-header'), 'one')
321 eq(msg.get_param('baz', header='x-header'), 'two')
322 # XXX: We are not RFC-2045 compliant! We cannot parse:
323 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
324 # msg.get_param("weird")
325 # yet.
326
327 def test_get_param_funky_continuation_lines(self):
328 msg = self._msgobj('msg_22.txt')
329 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
330
331 def test_get_param_with_semis_in_quotes(self):
332 msg = email.message_from_string(
333 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
334 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
335 self.assertEqual(msg.get_param('name', unquote=False),
336 '"Jim&amp;&amp;Jill"')
337
R. David Murrayd48739f2010-04-14 18:59:18 +0000338 def test_get_param_with_quotes(self):
339 msg = email.message_from_string(
340 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
341 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
342 msg = email.message_from_string(
343 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
344 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
345
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000346 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000347 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000348 msg = email.message_from_string('Header: exists')
349 unless('header' in msg)
350 unless('Header' in msg)
351 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000352 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000353
354 def test_set_param(self):
355 eq = self.assertEqual
356 msg = Message()
357 msg.set_param('charset', 'iso-2022-jp')
358 eq(msg.get_param('charset'), 'iso-2022-jp')
359 msg.set_param('importance', 'high value')
360 eq(msg.get_param('importance'), 'high value')
361 eq(msg.get_param('importance', unquote=False), '"high value"')
362 eq(msg.get_params(), [('text/plain', ''),
363 ('charset', 'iso-2022-jp'),
364 ('importance', 'high value')])
365 eq(msg.get_params(unquote=False), [('text/plain', ''),
366 ('charset', '"iso-2022-jp"'),
367 ('importance', '"high value"')])
368 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
369 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
370
371 def test_del_param(self):
372 eq = self.assertEqual
373 msg = self._msgobj('msg_05.txt')
374 eq(msg.get_params(),
375 [('multipart/report', ''), ('report-type', 'delivery-status'),
376 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
377 old_val = msg.get_param("report-type")
378 msg.del_param("report-type")
379 eq(msg.get_params(),
380 [('multipart/report', ''),
381 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
382 msg.set_param("report-type", old_val)
383 eq(msg.get_params(),
384 [('multipart/report', ''),
385 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
386 ('report-type', old_val)])
387
388 def test_del_param_on_other_header(self):
389 msg = Message()
390 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
391 msg.del_param('filename', 'content-disposition')
392 self.assertEqual(msg['content-disposition'], 'attachment')
393
394 def test_set_type(self):
395 eq = self.assertEqual
396 msg = Message()
397 self.assertRaises(ValueError, msg.set_type, 'text')
398 msg.set_type('text/plain')
399 eq(msg['content-type'], 'text/plain')
400 msg.set_param('charset', 'us-ascii')
401 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
402 msg.set_type('text/html')
403 eq(msg['content-type'], 'text/html; charset="us-ascii"')
404
405 def test_set_type_on_other_header(self):
406 msg = Message()
407 msg['X-Content-Type'] = 'text/plain'
408 msg.set_type('application/octet-stream', 'X-Content-Type')
409 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
410
411 def test_get_content_type_missing(self):
412 msg = Message()
413 self.assertEqual(msg.get_content_type(), 'text/plain')
414
415 def test_get_content_type_missing_with_default_type(self):
416 msg = Message()
417 msg.set_default_type('message/rfc822')
418 self.assertEqual(msg.get_content_type(), 'message/rfc822')
419
420 def test_get_content_type_from_message_implicit(self):
421 msg = self._msgobj('msg_30.txt')
422 self.assertEqual(msg.get_payload(0).get_content_type(),
423 'message/rfc822')
424
425 def test_get_content_type_from_message_explicit(self):
426 msg = self._msgobj('msg_28.txt')
427 self.assertEqual(msg.get_payload(0).get_content_type(),
428 'message/rfc822')
429
430 def test_get_content_type_from_message_text_plain_implicit(self):
431 msg = self._msgobj('msg_03.txt')
432 self.assertEqual(msg.get_content_type(), 'text/plain')
433
434 def test_get_content_type_from_message_text_plain_explicit(self):
435 msg = self._msgobj('msg_01.txt')
436 self.assertEqual(msg.get_content_type(), 'text/plain')
437
438 def test_get_content_maintype_missing(self):
439 msg = Message()
440 self.assertEqual(msg.get_content_maintype(), 'text')
441
442 def test_get_content_maintype_missing_with_default_type(self):
443 msg = Message()
444 msg.set_default_type('message/rfc822')
445 self.assertEqual(msg.get_content_maintype(), 'message')
446
447 def test_get_content_maintype_from_message_implicit(self):
448 msg = self._msgobj('msg_30.txt')
449 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
450
451 def test_get_content_maintype_from_message_explicit(self):
452 msg = self._msgobj('msg_28.txt')
453 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
454
455 def test_get_content_maintype_from_message_text_plain_implicit(self):
456 msg = self._msgobj('msg_03.txt')
457 self.assertEqual(msg.get_content_maintype(), 'text')
458
459 def test_get_content_maintype_from_message_text_plain_explicit(self):
460 msg = self._msgobj('msg_01.txt')
461 self.assertEqual(msg.get_content_maintype(), 'text')
462
463 def test_get_content_subtype_missing(self):
464 msg = Message()
465 self.assertEqual(msg.get_content_subtype(), 'plain')
466
467 def test_get_content_subtype_missing_with_default_type(self):
468 msg = Message()
469 msg.set_default_type('message/rfc822')
470 self.assertEqual(msg.get_content_subtype(), 'rfc822')
471
472 def test_get_content_subtype_from_message_implicit(self):
473 msg = self._msgobj('msg_30.txt')
474 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
475
476 def test_get_content_subtype_from_message_explicit(self):
477 msg = self._msgobj('msg_28.txt')
478 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
479
480 def test_get_content_subtype_from_message_text_plain_implicit(self):
481 msg = self._msgobj('msg_03.txt')
482 self.assertEqual(msg.get_content_subtype(), 'plain')
483
484 def test_get_content_subtype_from_message_text_plain_explicit(self):
485 msg = self._msgobj('msg_01.txt')
486 self.assertEqual(msg.get_content_subtype(), 'plain')
487
488 def test_get_content_maintype_error(self):
489 msg = Message()
490 msg['Content-Type'] = 'no-slash-in-this-string'
491 self.assertEqual(msg.get_content_maintype(), 'text')
492
493 def test_get_content_subtype_error(self):
494 msg = Message()
495 msg['Content-Type'] = 'no-slash-in-this-string'
496 self.assertEqual(msg.get_content_subtype(), 'plain')
497
498 def test_replace_header(self):
499 eq = self.assertEqual
500 msg = Message()
501 msg.add_header('First', 'One')
502 msg.add_header('Second', 'Two')
503 msg.add_header('Third', 'Three')
504 eq(msg.keys(), ['First', 'Second', 'Third'])
505 eq(msg.values(), ['One', 'Two', 'Three'])
506 msg.replace_header('Second', 'Twenty')
507 eq(msg.keys(), ['First', 'Second', 'Third'])
508 eq(msg.values(), ['One', 'Twenty', 'Three'])
509 msg.add_header('First', 'Eleven')
510 msg.replace_header('First', 'One Hundred')
511 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
512 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
513 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
514
515 def test_broken_base64_payload(self):
516 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
517 msg = Message()
518 msg['content-type'] = 'audio/x-midi'
519 msg['content-transfer-encoding'] = 'base64'
520 msg.set_payload(x)
521 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000522 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000523
R. David Murray7ec754b2010-12-13 23:51:19 +0000524 # Issue 1078919
525 def test_ascii_add_header(self):
526 msg = Message()
527 msg.add_header('Content-Disposition', 'attachment',
528 filename='bud.gif')
529 self.assertEqual('attachment; filename="bud.gif"',
530 msg['Content-Disposition'])
531
532 def test_noascii_add_header(self):
533 msg = Message()
534 msg.add_header('Content-Disposition', 'attachment',
535 filename="Fußballer.ppt")
536 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000537 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000538 msg['Content-Disposition'])
539
540 def test_nonascii_add_header_via_triple(self):
541 msg = Message()
542 msg.add_header('Content-Disposition', 'attachment',
543 filename=('iso-8859-1', '', 'Fußballer.ppt'))
544 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000545 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
546 msg['Content-Disposition'])
547
548 def test_ascii_add_header_with_tspecial(self):
549 msg = Message()
550 msg.add_header('Content-Disposition', 'attachment',
551 filename="windows [filename].ppt")
552 self.assertEqual(
553 'attachment; filename="windows [filename].ppt"',
554 msg['Content-Disposition'])
555
556 def test_nonascii_add_header_with_tspecial(self):
557 msg = Message()
558 msg.add_header('Content-Disposition', 'attachment',
559 filename="Fußballer [filename].ppt")
560 self.assertEqual(
561 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000562 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000563
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000564 # Issue 5871: reject an attempt to embed a header inside a header value
565 # (header injection attack).
566 def test_embeded_header_via_Header_rejected(self):
567 msg = Message()
568 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
569 self.assertRaises(errors.HeaderParseError, msg.as_string)
570
571 def test_embeded_header_via_string_rejected(self):
572 msg = Message()
573 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
574 self.assertRaises(errors.HeaderParseError, msg.as_string)
575
Ezio Melottib3aedd42010-11-20 19:04:17 +0000576
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000577# Test the email.encoders module
578class TestEncoders(unittest.TestCase):
579 def test_encode_empty_payload(self):
580 eq = self.assertEqual
581 msg = Message()
582 msg.set_charset('us-ascii')
583 eq(msg['content-transfer-encoding'], '7bit')
584
585 def test_default_cte(self):
586 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000587 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000588 msg = MIMEText('hello world')
589 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000590 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000591 msg = MIMEText('hello \xf8 world')
592 eq(msg['content-transfer-encoding'], '8bit')
593 # And now with a different charset
594 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
595 eq(msg['content-transfer-encoding'], 'quoted-printable')
596
R. David Murraye85200d2010-05-06 01:41:14 +0000597 def test_encode7or8bit(self):
598 # Make sure a charset whose input character set is 8bit but
599 # whose output character set is 7bit gets a transfer-encoding
600 # of 7bit.
601 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000602 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000603 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000604
Ezio Melottib3aedd42010-11-20 19:04:17 +0000605
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000606# Test long header wrapping
607class TestLongHeaders(TestEmailBase):
608 def test_split_long_continuation(self):
609 eq = self.ndiffAssertEqual
610 msg = email.message_from_string("""\
611Subject: bug demonstration
612\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
613\tmore text
614
615test
616""")
617 sfp = StringIO()
618 g = Generator(sfp)
619 g.flatten(msg)
620 eq(sfp.getvalue(), """\
621Subject: bug demonstration
622\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
623\tmore text
624
625test
626""")
627
628 def test_another_long_almost_unsplittable_header(self):
629 eq = self.ndiffAssertEqual
630 hstr = """\
631bug demonstration
632\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
633\tmore text"""
634 h = Header(hstr, continuation_ws='\t')
635 eq(h.encode(), """\
636bug demonstration
637\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
638\tmore text""")
639 h = Header(hstr.replace('\t', ' '))
640 eq(h.encode(), """\
641bug demonstration
642 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
643 more text""")
644
645 def test_long_nonstring(self):
646 eq = self.ndiffAssertEqual
647 g = Charset("iso-8859-1")
648 cz = Charset("iso-8859-2")
649 utf8 = Charset("utf-8")
650 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
651 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
652 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
653 b'bef\xf6rdert. ')
654 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
655 b'd\xf9vtipu.. ')
656 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
657 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
658 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
659 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
660 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
661 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
662 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
663 '\u3044\u307e\u3059\u3002')
664 h = Header(g_head, g, header_name='Subject')
665 h.append(cz_head, cz)
666 h.append(utf8_head, utf8)
667 msg = Message()
668 msg['Subject'] = h
669 sfp = StringIO()
670 g = Generator(sfp)
671 g.flatten(msg)
672 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000673Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
674 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
675 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
676 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
677 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
678 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
679 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
680 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
681 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
682 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
683 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000684
685""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000686 eq(h.encode(maxlinelen=76), """\
687=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
688 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
689 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
690 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
691 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
692 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
693 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
694 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
695 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
696 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
697 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000698
699 def test_long_header_encode(self):
700 eq = self.ndiffAssertEqual
701 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
702 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
703 header_name='X-Foobar-Spoink-Defrobnit')
704 eq(h.encode(), '''\
705wasnipoop; giraffes="very-long-necked-animals";
706 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
707
708 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
709 eq = self.ndiffAssertEqual
710 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
711 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
712 header_name='X-Foobar-Spoink-Defrobnit',
713 continuation_ws='\t')
714 eq(h.encode(), '''\
715wasnipoop; giraffes="very-long-necked-animals";
716 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
717
718 def test_long_header_encode_with_tab_continuation(self):
719 eq = self.ndiffAssertEqual
720 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
721 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
722 header_name='X-Foobar-Spoink-Defrobnit',
723 continuation_ws='\t')
724 eq(h.encode(), '''\
725wasnipoop; giraffes="very-long-necked-animals";
726\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
727
R David Murray3a6152f2011-03-14 21:13:03 -0400728 def test_header_encode_with_different_output_charset(self):
729 h = Header('文', 'euc-jp')
730 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
731
732 def test_long_header_encode_with_different_output_charset(self):
733 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
734 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
735 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
736 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
737 res = """\
738=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
739 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
740 self.assertEqual(h.encode(), res)
741
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000742 def test_header_splitter(self):
743 eq = self.ndiffAssertEqual
744 msg = MIMEText('')
745 # It'd be great if we could use add_header() here, but that doesn't
746 # guarantee an order of the parameters.
747 msg['X-Foobar-Spoink-Defrobnit'] = (
748 'wasnipoop; giraffes="very-long-necked-animals"; '
749 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
750 sfp = StringIO()
751 g = Generator(sfp)
752 g.flatten(msg)
753 eq(sfp.getvalue(), '''\
754Content-Type: text/plain; charset="us-ascii"
755MIME-Version: 1.0
756Content-Transfer-Encoding: 7bit
757X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
758 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
759
760''')
761
762 def test_no_semis_header_splitter(self):
763 eq = self.ndiffAssertEqual
764 msg = Message()
765 msg['From'] = 'test@dom.ain'
766 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
767 msg.set_payload('Test')
768 sfp = StringIO()
769 g = Generator(sfp)
770 g.flatten(msg)
771 eq(sfp.getvalue(), """\
772From: test@dom.ain
773References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
774 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
775
776Test""")
777
778 def test_no_split_long_header(self):
779 eq = self.ndiffAssertEqual
780 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000781 h = Header(hstr)
782 # These come on two lines because Headers are really field value
783 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000784 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000785References:
786 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
787 h = Header('x' * 80)
788 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000789
790 def test_splitting_multiple_long_lines(self):
791 eq = self.ndiffAssertEqual
792 hstr = """\
793from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
794\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
795\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
796"""
797 h = Header(hstr, continuation_ws='\t')
798 eq(h.encode(), """\
799from babylon.socal-raves.org (localhost [127.0.0.1]);
800 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
801 for <mailman-admin@babylon.socal-raves.org>;
802 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
803\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
804 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
805 for <mailman-admin@babylon.socal-raves.org>;
806 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
807\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
808 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
809 for <mailman-admin@babylon.socal-raves.org>;
810 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
811
812 def test_splitting_first_line_only_is_long(self):
813 eq = self.ndiffAssertEqual
814 hstr = """\
815from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
816\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
817\tid 17k4h5-00034i-00
818\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
819 h = Header(hstr, maxlinelen=78, header_name='Received',
820 continuation_ws='\t')
821 eq(h.encode(), """\
822from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
823 helo=cthulhu.gerg.ca)
824\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
825\tid 17k4h5-00034i-00
826\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
827
828 def test_long_8bit_header(self):
829 eq = self.ndiffAssertEqual
830 msg = Message()
831 h = Header('Britische Regierung gibt', 'iso-8859-1',
832 header_name='Subject')
833 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +0000834 eq(h.encode(maxlinelen=76), """\
835=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
836 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000837 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +0000838 eq(msg.as_string(maxheaderlen=76), """\
839Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
840 =?iso-8859-1?q?hore-Windkraftprojekte?=
841
842""")
843 eq(msg.as_string(maxheaderlen=0), """\
844Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000845
846""")
847
848 def test_long_8bit_header_no_charset(self):
849 eq = self.ndiffAssertEqual
850 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +0000851 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
852 'f\xfcr Offshore-Windkraftprojekte '
853 '<a-very-long-address@example.com>')
854 msg['Reply-To'] = header_string
855 self.assertRaises(UnicodeEncodeError, msg.as_string)
856 msg = Message()
857 msg['Reply-To'] = Header(header_string, 'utf-8',
858 header_name='Reply-To')
859 eq(msg.as_string(maxheaderlen=78), """\
860Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
861 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000862
863""")
864
865 def test_long_to_header(self):
866 eq = self.ndiffAssertEqual
867 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
868 '<someone@eecs.umich.edu>,'
869 '"Someone Test #B" <someone@umich.edu>, '
870 '"Someone Test #C" <someone@eecs.umich.edu>, '
871 '"Someone Test #D" <someone@eecs.umich.edu>')
872 msg = Message()
873 msg['To'] = to
874 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +0000875To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000876 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +0000877 "Someone Test #C" <someone@eecs.umich.edu>,
878 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000879
880''')
881
882 def test_long_line_after_append(self):
883 eq = self.ndiffAssertEqual
884 s = 'This is an example of string which has almost the limit of header length.'
885 h = Header(s)
886 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +0000887 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000888This is an example of string which has almost the limit of header length.
889 Add another line.""")
890
891 def test_shorter_line_with_append(self):
892 eq = self.ndiffAssertEqual
893 s = 'This is a shorter line.'
894 h = Header(s)
895 h.append('Add another sentence. (Surprise?)')
896 eq(h.encode(),
897 'This is a shorter line. Add another sentence. (Surprise?)')
898
899 def test_long_field_name(self):
900 eq = self.ndiffAssertEqual
901 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +0000902 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
903 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
904 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
905 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000906 h = Header(gs, 'iso-8859-1', header_name=fn)
907 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +0000908 eq(h.encode(maxlinelen=76), """\
909=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
910 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
911 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
912 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000913
914 def test_long_received_header(self):
915 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
916 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
917 'Wed, 05 Mar 2003 18:10:18 -0700')
918 msg = Message()
919 msg['Received-1'] = Header(h, continuation_ws='\t')
920 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000921 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000922 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000923Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
924 Wed, 05 Mar 2003 18:10:18 -0700
925Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
926 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000927
928""")
929
930 def test_string_headerinst_eq(self):
931 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
932 'tu-muenchen.de> (David Bremner\'s message of '
933 '"Thu, 6 Mar 2003 13:58:21 +0100")')
934 msg = Message()
935 msg['Received-1'] = Header(h, header_name='Received-1',
936 continuation_ws='\t')
937 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000938 # XXX This should be splitting on spaces not commas.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000939 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000940Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
941 6 Mar 2003 13:58:21 +0100\")
942Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
943 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000944
945""")
946
947 def test_long_unbreakable_lines_with_continuation(self):
948 eq = self.ndiffAssertEqual
949 msg = Message()
950 t = """\
951iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
952 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
953 msg['Face-1'] = t
954 msg['Face-2'] = Header(t, header_name='Face-2')
Barry Warsawbef9d212007-08-31 10:55:37 +0000955 # XXX This splitting is all wrong. It the first value line should be
956 # snug against the field name.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000957 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +0000958Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000959 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000960 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +0000961Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +0000962 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000963 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
964
965""")
966
967 def test_another_long_multiline_header(self):
968 eq = self.ndiffAssertEqual
969 m = ('Received: from siimage.com '
970 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +0000971 'Microsoft SMTPSVC(5.0.2195.4905); '
972 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000973 msg = email.message_from_string(m)
974 eq(msg.as_string(maxheaderlen=78), '''\
Barry Warsawbef9d212007-08-31 10:55:37 +0000975Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
976 Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000977
978''')
979
980 def test_long_lines_with_different_header(self):
981 eq = self.ndiffAssertEqual
982 h = ('List-Unsubscribe: '
983 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
984 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
985 '?subject=unsubscribe>')
986 msg = Message()
987 msg['List'] = h
988 msg['List'] = Header(h, header_name='List')
989 eq(msg.as_string(maxheaderlen=78), """\
990List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000991 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000992List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000993 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000994
995""")
996
R. David Murray6f0022d2011-01-07 21:57:25 +0000997 def test_long_rfc2047_header_with_embedded_fws(self):
998 h = Header(textwrap.dedent("""\
999 We're going to pretend this header is in a non-ascii character set
1000 \tto see if line wrapping with encoded words and embedded
1001 folding white space works"""),
1002 charset='utf-8',
1003 header_name='Test')
1004 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1005 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1006 =?utf-8?q?cter_set?=
1007 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1008 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1009
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001010
Ezio Melottib3aedd42010-11-20 19:04:17 +00001011
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001012# Test mangling of "From " lines in the body of a message
1013class TestFromMangling(unittest.TestCase):
1014 def setUp(self):
1015 self.msg = Message()
1016 self.msg['From'] = 'aaa@bbb.org'
1017 self.msg.set_payload("""\
1018From the desk of A.A.A.:
1019Blah blah blah
1020""")
1021
1022 def test_mangled_from(self):
1023 s = StringIO()
1024 g = Generator(s, mangle_from_=True)
1025 g.flatten(self.msg)
1026 self.assertEqual(s.getvalue(), """\
1027From: aaa@bbb.org
1028
1029>From the desk of A.A.A.:
1030Blah blah blah
1031""")
1032
1033 def test_dont_mangle_from(self):
1034 s = StringIO()
1035 g = Generator(s, mangle_from_=False)
1036 g.flatten(self.msg)
1037 self.assertEqual(s.getvalue(), """\
1038From: aaa@bbb.org
1039
1040From the desk of A.A.A.:
1041Blah blah blah
1042""")
1043
1044
Ezio Melottib3aedd42010-11-20 19:04:17 +00001045
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001046# Test the basic MIMEAudio class
1047class TestMIMEAudio(unittest.TestCase):
1048 def setUp(self):
1049 # Make sure we pick up the audiotest.au that lives in email/test/data.
1050 # In Python, there's an audiotest.au living in Lib/test but that isn't
1051 # included in some binary distros that don't include the test
1052 # package. The trailing empty string on the .join() is significant
1053 # since findfile() will do a dirname().
1054 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
1055 with open(findfile('audiotest.au', datadir), 'rb') as fp:
1056 self._audiodata = fp.read()
1057 self._au = MIMEAudio(self._audiodata)
1058
1059 def test_guess_minor_type(self):
1060 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1061
1062 def test_encoding(self):
1063 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001064 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1065 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001066
1067 def test_checkSetMinor(self):
1068 au = MIMEAudio(self._audiodata, 'fish')
1069 self.assertEqual(au.get_content_type(), 'audio/fish')
1070
1071 def test_add_header(self):
1072 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001073 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001074 self._au.add_header('Content-Disposition', 'attachment',
1075 filename='audiotest.au')
1076 eq(self._au['content-disposition'],
1077 'attachment; filename="audiotest.au"')
1078 eq(self._au.get_params(header='content-disposition'),
1079 [('attachment', ''), ('filename', 'audiotest.au')])
1080 eq(self._au.get_param('filename', header='content-disposition'),
1081 'audiotest.au')
1082 missing = []
1083 eq(self._au.get_param('attachment', header='content-disposition'), '')
1084 unless(self._au.get_param('foo', failobj=missing,
1085 header='content-disposition') is missing)
1086 # Try some missing stuff
1087 unless(self._au.get_param('foobar', missing) is missing)
1088 unless(self._au.get_param('attachment', missing,
1089 header='foobar') is missing)
1090
1091
Ezio Melottib3aedd42010-11-20 19:04:17 +00001092
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001093# Test the basic MIMEImage class
1094class TestMIMEImage(unittest.TestCase):
1095 def setUp(self):
1096 with openfile('PyBanner048.gif', 'rb') as fp:
1097 self._imgdata = fp.read()
1098 self._im = MIMEImage(self._imgdata)
1099
1100 def test_guess_minor_type(self):
1101 self.assertEqual(self._im.get_content_type(), 'image/gif')
1102
1103 def test_encoding(self):
1104 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001105 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1106 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001107
1108 def test_checkSetMinor(self):
1109 im = MIMEImage(self._imgdata, 'fish')
1110 self.assertEqual(im.get_content_type(), 'image/fish')
1111
1112 def test_add_header(self):
1113 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001114 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001115 self._im.add_header('Content-Disposition', 'attachment',
1116 filename='dingusfish.gif')
1117 eq(self._im['content-disposition'],
1118 'attachment; filename="dingusfish.gif"')
1119 eq(self._im.get_params(header='content-disposition'),
1120 [('attachment', ''), ('filename', 'dingusfish.gif')])
1121 eq(self._im.get_param('filename', header='content-disposition'),
1122 'dingusfish.gif')
1123 missing = []
1124 eq(self._im.get_param('attachment', header='content-disposition'), '')
1125 unless(self._im.get_param('foo', failobj=missing,
1126 header='content-disposition') is missing)
1127 # Try some missing stuff
1128 unless(self._im.get_param('foobar', missing) is missing)
1129 unless(self._im.get_param('attachment', missing,
1130 header='foobar') is missing)
1131
1132
Ezio Melottib3aedd42010-11-20 19:04:17 +00001133
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001134# Test the basic MIMEApplication class
1135class TestMIMEApplication(unittest.TestCase):
1136 def test_headers(self):
1137 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001138 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001139 eq(msg.get_content_type(), 'application/octet-stream')
1140 eq(msg['content-transfer-encoding'], 'base64')
1141
1142 def test_body(self):
1143 eq = self.assertEqual
Barry Warsaw8c571042007-08-30 19:17:18 +00001144 bytes = b'\xfa\xfb\xfc\xfd\xfe\xff'
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001145 msg = MIMEApplication(bytes)
R. David Murray7da8f062010-06-04 16:11:08 +00001146 eq(msg.get_payload(), '+vv8/f7/')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001147 eq(msg.get_payload(decode=True), bytes)
1148
1149
Ezio Melottib3aedd42010-11-20 19:04:17 +00001150
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001151# Test the basic MIMEText class
1152class TestMIMEText(unittest.TestCase):
1153 def setUp(self):
1154 self._msg = MIMEText('hello there')
1155
1156 def test_types(self):
1157 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001158 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001159 eq(self._msg.get_content_type(), 'text/plain')
1160 eq(self._msg.get_param('charset'), 'us-ascii')
1161 missing = []
1162 unless(self._msg.get_param('foobar', missing) is missing)
1163 unless(self._msg.get_param('charset', missing, header='foobar')
1164 is missing)
1165
1166 def test_payload(self):
1167 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001168 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001169
1170 def test_charset(self):
1171 eq = self.assertEqual
1172 msg = MIMEText('hello there', _charset='us-ascii')
1173 eq(msg.get_charset().input_charset, 'us-ascii')
1174 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1175
R. David Murray850fc852010-06-03 01:58:28 +00001176 def test_7bit_input(self):
1177 eq = self.assertEqual
1178 msg = MIMEText('hello there', _charset='us-ascii')
1179 eq(msg.get_charset().input_charset, 'us-ascii')
1180 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1181
1182 def test_7bit_input_no_charset(self):
1183 eq = self.assertEqual
1184 msg = MIMEText('hello there')
1185 eq(msg.get_charset(), 'us-ascii')
1186 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1187 self.assertTrue('hello there' in msg.as_string())
1188
1189 def test_utf8_input(self):
1190 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1191 eq = self.assertEqual
1192 msg = MIMEText(teststr, _charset='utf-8')
1193 eq(msg.get_charset().output_charset, 'utf-8')
1194 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1195 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1196
1197 @unittest.skip("can't fix because of backward compat in email5, "
1198 "will fix in email6")
1199 def test_utf8_input_no_charset(self):
1200 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1201 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1202
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001203
Ezio Melottib3aedd42010-11-20 19:04:17 +00001204
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001205# Test complicated multipart/* messages
1206class TestMultipart(TestEmailBase):
1207 def setUp(self):
1208 with openfile('PyBanner048.gif', 'rb') as fp:
1209 data = fp.read()
1210 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1211 image = MIMEImage(data, name='dingusfish.gif')
1212 image.add_header('content-disposition', 'attachment',
1213 filename='dingusfish.gif')
1214 intro = MIMEText('''\
1215Hi there,
1216
1217This is the dingus fish.
1218''')
1219 container.attach(intro)
1220 container.attach(image)
1221 container['From'] = 'Barry <barry@digicool.com>'
1222 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1223 container['Subject'] = 'Here is your dingus fish'
1224
1225 now = 987809702.54848599
1226 timetuple = time.localtime(now)
1227 if timetuple[-1] == 0:
1228 tzsecs = time.timezone
1229 else:
1230 tzsecs = time.altzone
1231 if tzsecs > 0:
1232 sign = '-'
1233 else:
1234 sign = '+'
1235 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1236 container['Date'] = time.strftime(
1237 '%a, %d %b %Y %H:%M:%S',
1238 time.localtime(now)) + tzoffset
1239 self._msg = container
1240 self._im = image
1241 self._txt = intro
1242
1243 def test_hierarchy(self):
1244 # convenience
1245 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001246 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001247 raises = self.assertRaises
1248 # tests
1249 m = self._msg
1250 unless(m.is_multipart())
1251 eq(m.get_content_type(), 'multipart/mixed')
1252 eq(len(m.get_payload()), 2)
1253 raises(IndexError, m.get_payload, 2)
1254 m0 = m.get_payload(0)
1255 m1 = m.get_payload(1)
1256 unless(m0 is self._txt)
1257 unless(m1 is self._im)
1258 eq(m.get_payload(), [m0, m1])
1259 unless(not m0.is_multipart())
1260 unless(not m1.is_multipart())
1261
1262 def test_empty_multipart_idempotent(self):
1263 text = """\
1264Content-Type: multipart/mixed; boundary="BOUNDARY"
1265MIME-Version: 1.0
1266Subject: A subject
1267To: aperson@dom.ain
1268From: bperson@dom.ain
1269
1270
1271--BOUNDARY
1272
1273
1274--BOUNDARY--
1275"""
1276 msg = Parser().parsestr(text)
1277 self.ndiffAssertEqual(text, msg.as_string())
1278
1279 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1280 outer = MIMEBase('multipart', 'mixed')
1281 outer['Subject'] = 'A subject'
1282 outer['To'] = 'aperson@dom.ain'
1283 outer['From'] = 'bperson@dom.ain'
1284 outer.set_boundary('BOUNDARY')
1285 self.ndiffAssertEqual(outer.as_string(), '''\
1286Content-Type: multipart/mixed; boundary="BOUNDARY"
1287MIME-Version: 1.0
1288Subject: A subject
1289To: aperson@dom.ain
1290From: bperson@dom.ain
1291
1292--BOUNDARY
1293
1294--BOUNDARY--''')
1295
1296 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1297 outer = MIMEBase('multipart', 'mixed')
1298 outer['Subject'] = 'A subject'
1299 outer['To'] = 'aperson@dom.ain'
1300 outer['From'] = 'bperson@dom.ain'
1301 outer.preamble = ''
1302 outer.epilogue = ''
1303 outer.set_boundary('BOUNDARY')
1304 self.ndiffAssertEqual(outer.as_string(), '''\
1305Content-Type: multipart/mixed; boundary="BOUNDARY"
1306MIME-Version: 1.0
1307Subject: A subject
1308To: aperson@dom.ain
1309From: bperson@dom.ain
1310
1311
1312--BOUNDARY
1313
1314--BOUNDARY--
1315''')
1316
1317 def test_one_part_in_a_multipart(self):
1318 eq = self.ndiffAssertEqual
1319 outer = MIMEBase('multipart', 'mixed')
1320 outer['Subject'] = 'A subject'
1321 outer['To'] = 'aperson@dom.ain'
1322 outer['From'] = 'bperson@dom.ain'
1323 outer.set_boundary('BOUNDARY')
1324 msg = MIMEText('hello world')
1325 outer.attach(msg)
1326 eq(outer.as_string(), '''\
1327Content-Type: multipart/mixed; boundary="BOUNDARY"
1328MIME-Version: 1.0
1329Subject: A subject
1330To: aperson@dom.ain
1331From: bperson@dom.ain
1332
1333--BOUNDARY
1334Content-Type: text/plain; charset="us-ascii"
1335MIME-Version: 1.0
1336Content-Transfer-Encoding: 7bit
1337
1338hello world
1339--BOUNDARY--''')
1340
1341 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1342 eq = self.ndiffAssertEqual
1343 outer = MIMEBase('multipart', 'mixed')
1344 outer['Subject'] = 'A subject'
1345 outer['To'] = 'aperson@dom.ain'
1346 outer['From'] = 'bperson@dom.ain'
1347 outer.preamble = ''
1348 msg = MIMEText('hello world')
1349 outer.attach(msg)
1350 outer.set_boundary('BOUNDARY')
1351 eq(outer.as_string(), '''\
1352Content-Type: multipart/mixed; boundary="BOUNDARY"
1353MIME-Version: 1.0
1354Subject: A subject
1355To: aperson@dom.ain
1356From: bperson@dom.ain
1357
1358
1359--BOUNDARY
1360Content-Type: text/plain; charset="us-ascii"
1361MIME-Version: 1.0
1362Content-Transfer-Encoding: 7bit
1363
1364hello world
1365--BOUNDARY--''')
1366
1367
1368 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1369 eq = self.ndiffAssertEqual
1370 outer = MIMEBase('multipart', 'mixed')
1371 outer['Subject'] = 'A subject'
1372 outer['To'] = 'aperson@dom.ain'
1373 outer['From'] = 'bperson@dom.ain'
1374 outer.preamble = None
1375 msg = MIMEText('hello world')
1376 outer.attach(msg)
1377 outer.set_boundary('BOUNDARY')
1378 eq(outer.as_string(), '''\
1379Content-Type: multipart/mixed; boundary="BOUNDARY"
1380MIME-Version: 1.0
1381Subject: A subject
1382To: aperson@dom.ain
1383From: bperson@dom.ain
1384
1385--BOUNDARY
1386Content-Type: text/plain; charset="us-ascii"
1387MIME-Version: 1.0
1388Content-Transfer-Encoding: 7bit
1389
1390hello world
1391--BOUNDARY--''')
1392
1393
1394 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1395 eq = self.ndiffAssertEqual
1396 outer = MIMEBase('multipart', 'mixed')
1397 outer['Subject'] = 'A subject'
1398 outer['To'] = 'aperson@dom.ain'
1399 outer['From'] = 'bperson@dom.ain'
1400 outer.epilogue = None
1401 msg = MIMEText('hello world')
1402 outer.attach(msg)
1403 outer.set_boundary('BOUNDARY')
1404 eq(outer.as_string(), '''\
1405Content-Type: multipart/mixed; boundary="BOUNDARY"
1406MIME-Version: 1.0
1407Subject: A subject
1408To: aperson@dom.ain
1409From: bperson@dom.ain
1410
1411--BOUNDARY
1412Content-Type: text/plain; charset="us-ascii"
1413MIME-Version: 1.0
1414Content-Transfer-Encoding: 7bit
1415
1416hello world
1417--BOUNDARY--''')
1418
1419
1420 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1421 eq = self.ndiffAssertEqual
1422 outer = MIMEBase('multipart', 'mixed')
1423 outer['Subject'] = 'A subject'
1424 outer['To'] = 'aperson@dom.ain'
1425 outer['From'] = 'bperson@dom.ain'
1426 outer.epilogue = ''
1427 msg = MIMEText('hello world')
1428 outer.attach(msg)
1429 outer.set_boundary('BOUNDARY')
1430 eq(outer.as_string(), '''\
1431Content-Type: multipart/mixed; boundary="BOUNDARY"
1432MIME-Version: 1.0
1433Subject: A subject
1434To: aperson@dom.ain
1435From: bperson@dom.ain
1436
1437--BOUNDARY
1438Content-Type: text/plain; charset="us-ascii"
1439MIME-Version: 1.0
1440Content-Transfer-Encoding: 7bit
1441
1442hello world
1443--BOUNDARY--
1444''')
1445
1446
1447 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1448 eq = self.ndiffAssertEqual
1449 outer = MIMEBase('multipart', 'mixed')
1450 outer['Subject'] = 'A subject'
1451 outer['To'] = 'aperson@dom.ain'
1452 outer['From'] = 'bperson@dom.ain'
1453 outer.epilogue = '\n'
1454 msg = MIMEText('hello world')
1455 outer.attach(msg)
1456 outer.set_boundary('BOUNDARY')
1457 eq(outer.as_string(), '''\
1458Content-Type: multipart/mixed; boundary="BOUNDARY"
1459MIME-Version: 1.0
1460Subject: A subject
1461To: aperson@dom.ain
1462From: bperson@dom.ain
1463
1464--BOUNDARY
1465Content-Type: text/plain; charset="us-ascii"
1466MIME-Version: 1.0
1467Content-Transfer-Encoding: 7bit
1468
1469hello world
1470--BOUNDARY--
1471
1472''')
1473
1474 def test_message_external_body(self):
1475 eq = self.assertEqual
1476 msg = self._msgobj('msg_36.txt')
1477 eq(len(msg.get_payload()), 2)
1478 msg1 = msg.get_payload(1)
1479 eq(msg1.get_content_type(), 'multipart/alternative')
1480 eq(len(msg1.get_payload()), 2)
1481 for subpart in msg1.get_payload():
1482 eq(subpart.get_content_type(), 'message/external-body')
1483 eq(len(subpart.get_payload()), 1)
1484 subsubpart = subpart.get_payload(0)
1485 eq(subsubpart.get_content_type(), 'text/plain')
1486
1487 def test_double_boundary(self):
1488 # msg_37.txt is a multipart that contains two dash-boundary's in a
1489 # row. Our interpretation of RFC 2046 calls for ignoring the second
1490 # and subsequent boundaries.
1491 msg = self._msgobj('msg_37.txt')
1492 self.assertEqual(len(msg.get_payload()), 3)
1493
1494 def test_nested_inner_contains_outer_boundary(self):
1495 eq = self.ndiffAssertEqual
1496 # msg_38.txt has an inner part that contains outer boundaries. My
1497 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1498 # these are illegal and should be interpreted as unterminated inner
1499 # parts.
1500 msg = self._msgobj('msg_38.txt')
1501 sfp = StringIO()
1502 iterators._structure(msg, sfp)
1503 eq(sfp.getvalue(), """\
1504multipart/mixed
1505 multipart/mixed
1506 multipart/alternative
1507 text/plain
1508 text/plain
1509 text/plain
1510 text/plain
1511""")
1512
1513 def test_nested_with_same_boundary(self):
1514 eq = self.ndiffAssertEqual
1515 # msg 39.txt is similarly evil in that it's got inner parts that use
1516 # the same boundary as outer parts. Again, I believe the way this is
1517 # parsed is closest to the spirit of RFC 2046
1518 msg = self._msgobj('msg_39.txt')
1519 sfp = StringIO()
1520 iterators._structure(msg, sfp)
1521 eq(sfp.getvalue(), """\
1522multipart/mixed
1523 multipart/mixed
1524 multipart/alternative
1525 application/octet-stream
1526 application/octet-stream
1527 text/plain
1528""")
1529
1530 def test_boundary_in_non_multipart(self):
1531 msg = self._msgobj('msg_40.txt')
1532 self.assertEqual(msg.as_string(), '''\
1533MIME-Version: 1.0
1534Content-Type: text/html; boundary="--961284236552522269"
1535
1536----961284236552522269
1537Content-Type: text/html;
1538Content-Transfer-Encoding: 7Bit
1539
1540<html></html>
1541
1542----961284236552522269--
1543''')
1544
1545 def test_boundary_with_leading_space(self):
1546 eq = self.assertEqual
1547 msg = email.message_from_string('''\
1548MIME-Version: 1.0
1549Content-Type: multipart/mixed; boundary=" XXXX"
1550
1551-- XXXX
1552Content-Type: text/plain
1553
1554
1555-- XXXX
1556Content-Type: text/plain
1557
1558-- XXXX--
1559''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001560 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001561 eq(msg.get_boundary(), ' XXXX')
1562 eq(len(msg.get_payload()), 2)
1563
1564 def test_boundary_without_trailing_newline(self):
1565 m = Parser().parsestr("""\
1566Content-Type: multipart/mixed; boundary="===============0012394164=="
1567MIME-Version: 1.0
1568
1569--===============0012394164==
1570Content-Type: image/file1.jpg
1571MIME-Version: 1.0
1572Content-Transfer-Encoding: base64
1573
1574YXNkZg==
1575--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001576 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001577
1578
Ezio Melottib3aedd42010-11-20 19:04:17 +00001579
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001580# Test some badly formatted messages
1581class TestNonConformant(TestEmailBase):
1582 def test_parse_missing_minor_type(self):
1583 eq = self.assertEqual
1584 msg = self._msgobj('msg_14.txt')
1585 eq(msg.get_content_type(), 'text/plain')
1586 eq(msg.get_content_maintype(), 'text')
1587 eq(msg.get_content_subtype(), 'plain')
1588
1589 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001590 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001591 msg = self._msgobj('msg_15.txt')
1592 # XXX We can probably eventually do better
1593 inner = msg.get_payload(0)
1594 unless(hasattr(inner, 'defects'))
1595 self.assertEqual(len(inner.defects), 1)
1596 unless(isinstance(inner.defects[0],
1597 errors.StartBoundaryNotFoundDefect))
1598
1599 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001600 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001601 msg = self._msgobj('msg_25.txt')
1602 unless(isinstance(msg.get_payload(), str))
1603 self.assertEqual(len(msg.defects), 2)
1604 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1605 unless(isinstance(msg.defects[1],
1606 errors.MultipartInvariantViolationDefect))
1607
1608 def test_invalid_content_type(self):
1609 eq = self.assertEqual
1610 neq = self.ndiffAssertEqual
1611 msg = Message()
1612 # RFC 2045, $5.2 says invalid yields text/plain
1613 msg['Content-Type'] = 'text'
1614 eq(msg.get_content_maintype(), 'text')
1615 eq(msg.get_content_subtype(), 'plain')
1616 eq(msg.get_content_type(), 'text/plain')
1617 # Clear the old value and try something /really/ invalid
1618 del msg['content-type']
1619 msg['Content-Type'] = 'foo'
1620 eq(msg.get_content_maintype(), 'text')
1621 eq(msg.get_content_subtype(), 'plain')
1622 eq(msg.get_content_type(), 'text/plain')
1623 # Still, make sure that the message is idempotently generated
1624 s = StringIO()
1625 g = Generator(s)
1626 g.flatten(msg)
1627 neq(s.getvalue(), 'Content-Type: foo\n\n')
1628
1629 def test_no_start_boundary(self):
1630 eq = self.ndiffAssertEqual
1631 msg = self._msgobj('msg_31.txt')
1632 eq(msg.get_payload(), """\
1633--BOUNDARY
1634Content-Type: text/plain
1635
1636message 1
1637
1638--BOUNDARY
1639Content-Type: text/plain
1640
1641message 2
1642
1643--BOUNDARY--
1644""")
1645
1646 def test_no_separating_blank_line(self):
1647 eq = self.ndiffAssertEqual
1648 msg = self._msgobj('msg_35.txt')
1649 eq(msg.as_string(), """\
1650From: aperson@dom.ain
1651To: bperson@dom.ain
1652Subject: here's something interesting
1653
1654counter to RFC 2822, there's no separating newline here
1655""")
1656
1657 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001658 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001659 msg = self._msgobj('msg_41.txt')
1660 unless(hasattr(msg, 'defects'))
1661 self.assertEqual(len(msg.defects), 2)
1662 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1663 unless(isinstance(msg.defects[1],
1664 errors.MultipartInvariantViolationDefect))
1665
1666 def test_missing_start_boundary(self):
1667 outer = self._msgobj('msg_42.txt')
1668 # The message structure is:
1669 #
1670 # multipart/mixed
1671 # text/plain
1672 # message/rfc822
1673 # multipart/mixed [*]
1674 #
1675 # [*] This message is missing its start boundary
1676 bad = outer.get_payload(1).get_payload(0)
1677 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001678 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001679 errors.StartBoundaryNotFoundDefect))
1680
1681 def test_first_line_is_continuation_header(self):
1682 eq = self.assertEqual
1683 m = ' Line 1\nLine 2\nLine 3'
1684 msg = email.message_from_string(m)
1685 eq(msg.keys(), [])
1686 eq(msg.get_payload(), 'Line 2\nLine 3')
1687 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001688 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001689 errors.FirstHeaderLineIsContinuationDefect))
1690 eq(msg.defects[0].line, ' Line 1\n')
1691
1692
Ezio Melottib3aedd42010-11-20 19:04:17 +00001693
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001694# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001695class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001696 def test_rfc2047_multiline(self):
1697 eq = self.assertEqual
1698 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1699 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1700 dh = decode_header(s)
1701 eq(dh, [
1702 (b'Re:', None),
1703 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1704 (b'baz foo bar', None),
1705 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1706 header = make_header(dh)
1707 eq(str(header),
1708 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001709 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001710Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1711 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001712
1713 def test_whitespace_eater_unicode(self):
1714 eq = self.assertEqual
1715 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1716 dh = decode_header(s)
1717 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1718 (b'Pirard <pirard@dom.ain>', None)])
1719 header = str(make_header(dh))
1720 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1721
1722 def test_whitespace_eater_unicode_2(self):
1723 eq = self.assertEqual
1724 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1725 dh = decode_header(s)
1726 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1727 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1728 hu = str(make_header(dh))
1729 eq(hu, 'The quick brown fox jumped over the lazy dog')
1730
1731 def test_rfc2047_missing_whitespace(self):
1732 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1733 dh = decode_header(s)
1734 self.assertEqual(dh, [(s, None)])
1735
1736 def test_rfc2047_with_whitespace(self):
1737 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1738 dh = decode_header(s)
1739 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1740 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1741 (b'sbord', None)])
1742
R. David Murrayc4e69cc2010-08-03 22:14:10 +00001743 def test_rfc2047_B_bad_padding(self):
1744 s = '=?iso-8859-1?B?%s?='
1745 data = [ # only test complete bytes
1746 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
1747 ('dmk=', b'vi'), ('dmk', b'vi')
1748 ]
1749 for q, a in data:
1750 dh = decode_header(s % q)
1751 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001752
R. David Murray31e984c2010-10-01 15:40:20 +00001753 def test_rfc2047_Q_invalid_digits(self):
1754 # issue 10004.
1755 s = '=?iso-8659-1?Q?andr=e9=zz?='
1756 self.assertEqual(decode_header(s),
1757 [(b'andr\xe9=zz', 'iso-8659-1')])
1758
Ezio Melottib3aedd42010-11-20 19:04:17 +00001759
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001760# Test the MIMEMessage class
1761class TestMIMEMessage(TestEmailBase):
1762 def setUp(self):
1763 with openfile('msg_11.txt') as fp:
1764 self._text = fp.read()
1765
1766 def test_type_error(self):
1767 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1768
1769 def test_valid_argument(self):
1770 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001771 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001772 subject = 'A sub-message'
1773 m = Message()
1774 m['Subject'] = subject
1775 r = MIMEMessage(m)
1776 eq(r.get_content_type(), 'message/rfc822')
1777 payload = r.get_payload()
1778 unless(isinstance(payload, list))
1779 eq(len(payload), 1)
1780 subpart = payload[0]
1781 unless(subpart is m)
1782 eq(subpart['subject'], subject)
1783
1784 def test_bad_multipart(self):
1785 eq = self.assertEqual
1786 msg1 = Message()
1787 msg1['Subject'] = 'subpart 1'
1788 msg2 = Message()
1789 msg2['Subject'] = 'subpart 2'
1790 r = MIMEMessage(msg1)
1791 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
1792
1793 def test_generate(self):
1794 # First craft the message to be encapsulated
1795 m = Message()
1796 m['Subject'] = 'An enclosed message'
1797 m.set_payload('Here is the body of the message.\n')
1798 r = MIMEMessage(m)
1799 r['Subject'] = 'The enclosing message'
1800 s = StringIO()
1801 g = Generator(s)
1802 g.flatten(r)
1803 self.assertEqual(s.getvalue(), """\
1804Content-Type: message/rfc822
1805MIME-Version: 1.0
1806Subject: The enclosing message
1807
1808Subject: An enclosed message
1809
1810Here is the body of the message.
1811""")
1812
1813 def test_parse_message_rfc822(self):
1814 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001815 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001816 msg = self._msgobj('msg_11.txt')
1817 eq(msg.get_content_type(), 'message/rfc822')
1818 payload = msg.get_payload()
1819 unless(isinstance(payload, list))
1820 eq(len(payload), 1)
1821 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001822 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001823 eq(submsg['subject'], 'An enclosed message')
1824 eq(submsg.get_payload(), 'Here is the body of the message.\n')
1825
1826 def test_dsn(self):
1827 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001828 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001829 # msg 16 is a Delivery Status Notification, see RFC 1894
1830 msg = self._msgobj('msg_16.txt')
1831 eq(msg.get_content_type(), 'multipart/report')
1832 unless(msg.is_multipart())
1833 eq(len(msg.get_payload()), 3)
1834 # Subpart 1 is a text/plain, human readable section
1835 subpart = msg.get_payload(0)
1836 eq(subpart.get_content_type(), 'text/plain')
1837 eq(subpart.get_payload(), """\
1838This report relates to a message you sent with the following header fields:
1839
1840 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
1841 Date: Sun, 23 Sep 2001 20:10:55 -0700
1842 From: "Ian T. Henry" <henryi@oxy.edu>
1843 To: SoCal Raves <scr@socal-raves.org>
1844 Subject: [scr] yeah for Ians!!
1845
1846Your message cannot be delivered to the following recipients:
1847
1848 Recipient address: jangel1@cougar.noc.ucla.edu
1849 Reason: recipient reached disk quota
1850
1851""")
1852 # Subpart 2 contains the machine parsable DSN information. It
1853 # consists of two blocks of headers, represented by two nested Message
1854 # objects.
1855 subpart = msg.get_payload(1)
1856 eq(subpart.get_content_type(), 'message/delivery-status')
1857 eq(len(subpart.get_payload()), 2)
1858 # message/delivery-status should treat each block as a bunch of
1859 # headers, i.e. a bunch of Message objects.
1860 dsn1 = subpart.get_payload(0)
1861 unless(isinstance(dsn1, Message))
1862 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
1863 eq(dsn1.get_param('dns', header='reporting-mta'), '')
1864 # Try a missing one <wink>
1865 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
1866 dsn2 = subpart.get_payload(1)
1867 unless(isinstance(dsn2, Message))
1868 eq(dsn2['action'], 'failed')
1869 eq(dsn2.get_params(header='original-recipient'),
1870 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
1871 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
1872 # Subpart 3 is the original message
1873 subpart = msg.get_payload(2)
1874 eq(subpart.get_content_type(), 'message/rfc822')
1875 payload = subpart.get_payload()
1876 unless(isinstance(payload, list))
1877 eq(len(payload), 1)
1878 subsubpart = payload[0]
1879 unless(isinstance(subsubpart, Message))
1880 eq(subsubpart.get_content_type(), 'text/plain')
1881 eq(subsubpart['message-id'],
1882 '<002001c144a6$8752e060$56104586@oxy.edu>')
1883
1884 def test_epilogue(self):
1885 eq = self.ndiffAssertEqual
1886 with openfile('msg_21.txt') as fp:
1887 text = fp.read()
1888 msg = Message()
1889 msg['From'] = 'aperson@dom.ain'
1890 msg['To'] = 'bperson@dom.ain'
1891 msg['Subject'] = 'Test'
1892 msg.preamble = 'MIME message'
1893 msg.epilogue = 'End of MIME message\n'
1894 msg1 = MIMEText('One')
1895 msg2 = MIMEText('Two')
1896 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1897 msg.attach(msg1)
1898 msg.attach(msg2)
1899 sfp = StringIO()
1900 g = Generator(sfp)
1901 g.flatten(msg)
1902 eq(sfp.getvalue(), text)
1903
1904 def test_no_nl_preamble(self):
1905 eq = self.ndiffAssertEqual
1906 msg = Message()
1907 msg['From'] = 'aperson@dom.ain'
1908 msg['To'] = 'bperson@dom.ain'
1909 msg['Subject'] = 'Test'
1910 msg.preamble = 'MIME message'
1911 msg.epilogue = ''
1912 msg1 = MIMEText('One')
1913 msg2 = MIMEText('Two')
1914 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1915 msg.attach(msg1)
1916 msg.attach(msg2)
1917 eq(msg.as_string(), """\
1918From: aperson@dom.ain
1919To: bperson@dom.ain
1920Subject: Test
1921Content-Type: multipart/mixed; boundary="BOUNDARY"
1922
1923MIME message
1924--BOUNDARY
1925Content-Type: text/plain; charset="us-ascii"
1926MIME-Version: 1.0
1927Content-Transfer-Encoding: 7bit
1928
1929One
1930--BOUNDARY
1931Content-Type: text/plain; charset="us-ascii"
1932MIME-Version: 1.0
1933Content-Transfer-Encoding: 7bit
1934
1935Two
1936--BOUNDARY--
1937""")
1938
1939 def test_default_type(self):
1940 eq = self.assertEqual
1941 with openfile('msg_30.txt') as fp:
1942 msg = email.message_from_file(fp)
1943 container1 = msg.get_payload(0)
1944 eq(container1.get_default_type(), 'message/rfc822')
1945 eq(container1.get_content_type(), 'message/rfc822')
1946 container2 = msg.get_payload(1)
1947 eq(container2.get_default_type(), 'message/rfc822')
1948 eq(container2.get_content_type(), 'message/rfc822')
1949 container1a = container1.get_payload(0)
1950 eq(container1a.get_default_type(), 'text/plain')
1951 eq(container1a.get_content_type(), 'text/plain')
1952 container2a = container2.get_payload(0)
1953 eq(container2a.get_default_type(), 'text/plain')
1954 eq(container2a.get_content_type(), 'text/plain')
1955
1956 def test_default_type_with_explicit_container_type(self):
1957 eq = self.assertEqual
1958 with openfile('msg_28.txt') as fp:
1959 msg = email.message_from_file(fp)
1960 container1 = msg.get_payload(0)
1961 eq(container1.get_default_type(), 'message/rfc822')
1962 eq(container1.get_content_type(), 'message/rfc822')
1963 container2 = msg.get_payload(1)
1964 eq(container2.get_default_type(), 'message/rfc822')
1965 eq(container2.get_content_type(), 'message/rfc822')
1966 container1a = container1.get_payload(0)
1967 eq(container1a.get_default_type(), 'text/plain')
1968 eq(container1a.get_content_type(), 'text/plain')
1969 container2a = container2.get_payload(0)
1970 eq(container2a.get_default_type(), 'text/plain')
1971 eq(container2a.get_content_type(), 'text/plain')
1972
1973 def test_default_type_non_parsed(self):
1974 eq = self.assertEqual
1975 neq = self.ndiffAssertEqual
1976 # Set up container
1977 container = MIMEMultipart('digest', 'BOUNDARY')
1978 container.epilogue = ''
1979 # Set up subparts
1980 subpart1a = MIMEText('message 1\n')
1981 subpart2a = MIMEText('message 2\n')
1982 subpart1 = MIMEMessage(subpart1a)
1983 subpart2 = MIMEMessage(subpart2a)
1984 container.attach(subpart1)
1985 container.attach(subpart2)
1986 eq(subpart1.get_content_type(), 'message/rfc822')
1987 eq(subpart1.get_default_type(), 'message/rfc822')
1988 eq(subpart2.get_content_type(), 'message/rfc822')
1989 eq(subpart2.get_default_type(), 'message/rfc822')
1990 neq(container.as_string(0), '''\
1991Content-Type: multipart/digest; boundary="BOUNDARY"
1992MIME-Version: 1.0
1993
1994--BOUNDARY
1995Content-Type: message/rfc822
1996MIME-Version: 1.0
1997
1998Content-Type: text/plain; charset="us-ascii"
1999MIME-Version: 1.0
2000Content-Transfer-Encoding: 7bit
2001
2002message 1
2003
2004--BOUNDARY
2005Content-Type: message/rfc822
2006MIME-Version: 1.0
2007
2008Content-Type: text/plain; charset="us-ascii"
2009MIME-Version: 1.0
2010Content-Transfer-Encoding: 7bit
2011
2012message 2
2013
2014--BOUNDARY--
2015''')
2016 del subpart1['content-type']
2017 del subpart1['mime-version']
2018 del subpart2['content-type']
2019 del subpart2['mime-version']
2020 eq(subpart1.get_content_type(), 'message/rfc822')
2021 eq(subpart1.get_default_type(), 'message/rfc822')
2022 eq(subpart2.get_content_type(), 'message/rfc822')
2023 eq(subpart2.get_default_type(), 'message/rfc822')
2024 neq(container.as_string(0), '''\
2025Content-Type: multipart/digest; boundary="BOUNDARY"
2026MIME-Version: 1.0
2027
2028--BOUNDARY
2029
2030Content-Type: text/plain; charset="us-ascii"
2031MIME-Version: 1.0
2032Content-Transfer-Encoding: 7bit
2033
2034message 1
2035
2036--BOUNDARY
2037
2038Content-Type: text/plain; charset="us-ascii"
2039MIME-Version: 1.0
2040Content-Transfer-Encoding: 7bit
2041
2042message 2
2043
2044--BOUNDARY--
2045''')
2046
2047 def test_mime_attachments_in_constructor(self):
2048 eq = self.assertEqual
2049 text1 = MIMEText('')
2050 text2 = MIMEText('')
2051 msg = MIMEMultipart(_subparts=(text1, text2))
2052 eq(len(msg.get_payload()), 2)
2053 eq(msg.get_payload(0), text1)
2054 eq(msg.get_payload(1), text2)
2055
Christian Heimes587c2bf2008-01-19 16:21:02 +00002056 def test_default_multipart_constructor(self):
2057 msg = MIMEMultipart()
2058 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002059
Ezio Melottib3aedd42010-11-20 19:04:17 +00002060
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002061# A general test of parser->model->generator idempotency. IOW, read a message
2062# in, parse it into a message object tree, then without touching the tree,
2063# regenerate the plain text. The original text and the transformed text
2064# should be identical. Note: that we ignore the Unix-From since that may
2065# contain a changed date.
2066class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002067
2068 linesep = '\n'
2069
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002070 def _msgobj(self, filename):
2071 with openfile(filename) as fp:
2072 data = fp.read()
2073 msg = email.message_from_string(data)
2074 return msg, data
2075
R. David Murray719a4492010-11-21 16:53:48 +00002076 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002077 eq = self.ndiffAssertEqual
2078 s = StringIO()
2079 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002080 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002081 eq(text, s.getvalue())
2082
2083 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002084 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002085 msg, text = self._msgobj('msg_01.txt')
2086 eq(msg.get_content_type(), 'text/plain')
2087 eq(msg.get_content_maintype(), 'text')
2088 eq(msg.get_content_subtype(), 'plain')
2089 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2090 eq(msg.get_param('charset'), 'us-ascii')
2091 eq(msg.preamble, None)
2092 eq(msg.epilogue, None)
2093 self._idempotent(msg, text)
2094
2095 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002096 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002097 msg, text = self._msgobj('msg_03.txt')
2098 eq(msg.get_content_type(), 'text/plain')
2099 eq(msg.get_params(), None)
2100 eq(msg.get_param('charset'), None)
2101 self._idempotent(msg, text)
2102
2103 def test_simple_multipart(self):
2104 msg, text = self._msgobj('msg_04.txt')
2105 self._idempotent(msg, text)
2106
2107 def test_MIME_digest(self):
2108 msg, text = self._msgobj('msg_02.txt')
2109 self._idempotent(msg, text)
2110
2111 def test_long_header(self):
2112 msg, text = self._msgobj('msg_27.txt')
2113 self._idempotent(msg, text)
2114
2115 def test_MIME_digest_with_part_headers(self):
2116 msg, text = self._msgobj('msg_28.txt')
2117 self._idempotent(msg, text)
2118
2119 def test_mixed_with_image(self):
2120 msg, text = self._msgobj('msg_06.txt')
2121 self._idempotent(msg, text)
2122
2123 def test_multipart_report(self):
2124 msg, text = self._msgobj('msg_05.txt')
2125 self._idempotent(msg, text)
2126
2127 def test_dsn(self):
2128 msg, text = self._msgobj('msg_16.txt')
2129 self._idempotent(msg, text)
2130
2131 def test_preamble_epilogue(self):
2132 msg, text = self._msgobj('msg_21.txt')
2133 self._idempotent(msg, text)
2134
2135 def test_multipart_one_part(self):
2136 msg, text = self._msgobj('msg_23.txt')
2137 self._idempotent(msg, text)
2138
2139 def test_multipart_no_parts(self):
2140 msg, text = self._msgobj('msg_24.txt')
2141 self._idempotent(msg, text)
2142
2143 def test_no_start_boundary(self):
2144 msg, text = self._msgobj('msg_31.txt')
2145 self._idempotent(msg, text)
2146
2147 def test_rfc2231_charset(self):
2148 msg, text = self._msgobj('msg_32.txt')
2149 self._idempotent(msg, text)
2150
2151 def test_more_rfc2231_parameters(self):
2152 msg, text = self._msgobj('msg_33.txt')
2153 self._idempotent(msg, text)
2154
2155 def test_text_plain_in_a_multipart_digest(self):
2156 msg, text = self._msgobj('msg_34.txt')
2157 self._idempotent(msg, text)
2158
2159 def test_nested_multipart_mixeds(self):
2160 msg, text = self._msgobj('msg_12a.txt')
2161 self._idempotent(msg, text)
2162
2163 def test_message_external_body_idempotent(self):
2164 msg, text = self._msgobj('msg_36.txt')
2165 self._idempotent(msg, text)
2166
R. David Murray719a4492010-11-21 16:53:48 +00002167 def test_message_delivery_status(self):
2168 msg, text = self._msgobj('msg_43.txt')
2169 self._idempotent(msg, text, unixfrom=True)
2170
R. David Murray96fd54e2010-10-08 15:55:28 +00002171 def test_message_signed_idempotent(self):
2172 msg, text = self._msgobj('msg_45.txt')
2173 self._idempotent(msg, text)
2174
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002175 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002176 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002177 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002178 # Get a message object and reset the seek pointer for other tests
2179 msg, text = self._msgobj('msg_05.txt')
2180 eq(msg.get_content_type(), 'multipart/report')
2181 # Test the Content-Type: parameters
2182 params = {}
2183 for pk, pv in msg.get_params():
2184 params[pk] = pv
2185 eq(params['report-type'], 'delivery-status')
2186 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002187 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2188 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002189 eq(len(msg.get_payload()), 3)
2190 # Make sure the subparts are what we expect
2191 msg1 = msg.get_payload(0)
2192 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002193 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002194 msg2 = msg.get_payload(1)
2195 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002196 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002197 msg3 = msg.get_payload(2)
2198 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002199 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002200 payload = msg3.get_payload()
2201 unless(isinstance(payload, list))
2202 eq(len(payload), 1)
2203 msg4 = payload[0]
2204 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002205 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002206
2207 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002208 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002209 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002210 msg, text = self._msgobj('msg_06.txt')
2211 # Check some of the outer headers
2212 eq(msg.get_content_type(), 'message/rfc822')
2213 # Make sure the payload is a list of exactly one sub-Message, and that
2214 # that submessage has a type of text/plain
2215 payload = msg.get_payload()
2216 unless(isinstance(payload, list))
2217 eq(len(payload), 1)
2218 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002219 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002220 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002221 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002222 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002223
2224
Ezio Melottib3aedd42010-11-20 19:04:17 +00002225
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002226# Test various other bits of the package's functionality
2227class TestMiscellaneous(TestEmailBase):
2228 def test_message_from_string(self):
2229 with openfile('msg_01.txt') as fp:
2230 text = fp.read()
2231 msg = email.message_from_string(text)
2232 s = StringIO()
2233 # Don't wrap/continue long headers since we're trying to test
2234 # idempotency.
2235 g = Generator(s, maxheaderlen=0)
2236 g.flatten(msg)
2237 self.assertEqual(text, s.getvalue())
2238
2239 def test_message_from_file(self):
2240 with openfile('msg_01.txt') as fp:
2241 text = fp.read()
2242 fp.seek(0)
2243 msg = email.message_from_file(fp)
2244 s = StringIO()
2245 # Don't wrap/continue long headers since we're trying to test
2246 # idempotency.
2247 g = Generator(s, maxheaderlen=0)
2248 g.flatten(msg)
2249 self.assertEqual(text, s.getvalue())
2250
2251 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002252 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002253 with openfile('msg_01.txt') as fp:
2254 text = fp.read()
2255
2256 # Create a subclass
2257 class MyMessage(Message):
2258 pass
2259
2260 msg = email.message_from_string(text, MyMessage)
2261 unless(isinstance(msg, MyMessage))
2262 # Try something more complicated
2263 with openfile('msg_02.txt') as fp:
2264 text = fp.read()
2265 msg = email.message_from_string(text, MyMessage)
2266 for subpart in msg.walk():
2267 unless(isinstance(subpart, MyMessage))
2268
2269 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002270 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002271 # Create a subclass
2272 class MyMessage(Message):
2273 pass
2274
2275 with openfile('msg_01.txt') as fp:
2276 msg = email.message_from_file(fp, MyMessage)
2277 unless(isinstance(msg, MyMessage))
2278 # Try something more complicated
2279 with openfile('msg_02.txt') as fp:
2280 msg = email.message_from_file(fp, MyMessage)
2281 for subpart in msg.walk():
2282 unless(isinstance(subpart, MyMessage))
2283
2284 def test__all__(self):
2285 module = __import__('email')
2286 # Can't use sorted() here due to Python 2.3 compatibility
2287 all = module.__all__[:]
2288 all.sort()
2289 self.assertEqual(all, [
2290 'base64mime', 'charset', 'encoders', 'errors', 'generator',
R. David Murray96fd54e2010-10-08 15:55:28 +00002291 'header', 'iterators', 'message', 'message_from_binary_file',
2292 'message_from_bytes', 'message_from_file',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002293 'message_from_string', 'mime', 'parser',
2294 'quoprimime', 'utils',
2295 ])
2296
2297 def test_formatdate(self):
2298 now = time.time()
2299 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2300 time.gmtime(now)[:6])
2301
2302 def test_formatdate_localtime(self):
2303 now = time.time()
2304 self.assertEqual(
2305 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2306 time.localtime(now)[:6])
2307
2308 def test_formatdate_usegmt(self):
2309 now = time.time()
2310 self.assertEqual(
2311 utils.formatdate(now, localtime=False),
2312 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2313 self.assertEqual(
2314 utils.formatdate(now, localtime=False, usegmt=True),
2315 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2316
2317 def test_parsedate_none(self):
2318 self.assertEqual(utils.parsedate(''), None)
2319
2320 def test_parsedate_compact(self):
2321 # The FWS after the comma is optional
2322 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2323 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2324
2325 def test_parsedate_no_dayofweek(self):
2326 eq = self.assertEqual
2327 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2328 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2329
2330 def test_parsedate_compact_no_dayofweek(self):
2331 eq = self.assertEqual
2332 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2333 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2334
R. David Murray4a62e892010-12-23 20:35:46 +00002335 def test_parsedate_no_space_before_positive_offset(self):
2336 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2337 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2338
2339 def test_parsedate_no_space_before_negative_offset(self):
2340 # Issue 1155362: we already handled '+' for this case.
2341 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2342 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2343
2344
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002345 def test_parsedate_acceptable_to_time_functions(self):
2346 eq = self.assertEqual
2347 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2348 t = int(time.mktime(timetup))
2349 eq(time.localtime(t)[:6], timetup[:6])
2350 eq(int(time.strftime('%Y', timetup)), 2003)
2351 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2352 t = int(time.mktime(timetup[:9]))
2353 eq(time.localtime(t)[:6], timetup[:6])
2354 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2355
R. David Murray219d1c82010-08-25 00:45:55 +00002356 def test_parsedate_y2k(self):
2357 """Test for parsing a date with a two-digit year.
2358
2359 Parsing a date with a two-digit year should return the correct
2360 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2361 obsoletes RFC822) requires four-digit years.
2362
2363 """
2364 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2365 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2366 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2367 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2368
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002369 def test_parseaddr_empty(self):
2370 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2371 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2372
2373 def test_noquote_dump(self):
2374 self.assertEqual(
2375 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2376 'A Silly Person <person@dom.ain>')
2377
2378 def test_escape_dump(self):
2379 self.assertEqual(
2380 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2381 r'"A \(Very\) Silly Person" <person@dom.ain>')
2382 a = r'A \(Special\) Person'
2383 b = 'person@dom.ain'
2384 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2385
2386 def test_escape_backslashes(self):
2387 self.assertEqual(
2388 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2389 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2390 a = r'Arthur \Backslash\ Foobar'
2391 b = 'person@dom.ain'
2392 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2393
2394 def test_name_with_dot(self):
2395 x = 'John X. Doe <jxd@example.com>'
2396 y = '"John X. Doe" <jxd@example.com>'
2397 a, b = ('John X. Doe', 'jxd@example.com')
2398 self.assertEqual(utils.parseaddr(x), (a, b))
2399 self.assertEqual(utils.parseaddr(y), (a, b))
2400 # formataddr() quotes the name if there's a dot in it
2401 self.assertEqual(utils.formataddr((a, b)), y)
2402
R. David Murray5397e862010-10-02 15:58:26 +00002403 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2404 # issue 10005. Note that in the third test the second pair of
2405 # backslashes is not actually a quoted pair because it is not inside a
2406 # comment or quoted string: the address being parsed has a quoted
2407 # string containing a quoted backslash, followed by 'example' and two
2408 # backslashes, followed by another quoted string containing a space and
2409 # the word 'example'. parseaddr copies those two backslashes
2410 # literally. Per rfc5322 this is not technically correct since a \ may
2411 # not appear in an address outside of a quoted string. It is probably
2412 # a sensible Postel interpretation, though.
2413 eq = self.assertEqual
2414 eq(utils.parseaddr('""example" example"@example.com'),
2415 ('', '""example" example"@example.com'))
2416 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2417 ('', '"\\"example\\" example"@example.com'))
2418 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2419 ('', '"\\\\"example\\\\" example"@example.com'))
2420
R. David Murray63563cd2010-12-18 18:25:38 +00002421 def test_parseaddr_preserves_spaces_in_local_part(self):
2422 # issue 9286. A normal RFC5322 local part should not contain any
2423 # folding white space, but legacy local parts can (they are a sequence
2424 # of atoms, not dotatoms). On the other hand we strip whitespace from
2425 # before the @ and around dots, on the assumption that the whitespace
2426 # around the punctuation is a mistake in what would otherwise be
2427 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2428 self.assertEqual(('', "merwok wok@xample.com"),
2429 utils.parseaddr("merwok wok@xample.com"))
2430 self.assertEqual(('', "merwok wok@xample.com"),
2431 utils.parseaddr("merwok wok@xample.com"))
2432 self.assertEqual(('', "merwok wok@xample.com"),
2433 utils.parseaddr(" merwok wok @xample.com"))
2434 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2435 utils.parseaddr('merwok"wok" wok@xample.com'))
2436 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2437 utils.parseaddr('merwok. wok . wok@xample.com'))
2438
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002439 def test_multiline_from_comment(self):
2440 x = """\
2441Foo
2442\tBar <foo@example.com>"""
2443 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2444
2445 def test_quote_dump(self):
2446 self.assertEqual(
2447 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2448 r'"A Silly; Person" <person@dom.ain>')
2449
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002450 def test_charset_richcomparisons(self):
2451 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002452 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002453 cset1 = Charset()
2454 cset2 = Charset()
2455 eq(cset1, 'us-ascii')
2456 eq(cset1, 'US-ASCII')
2457 eq(cset1, 'Us-AsCiI')
2458 eq('us-ascii', cset1)
2459 eq('US-ASCII', cset1)
2460 eq('Us-AsCiI', cset1)
2461 ne(cset1, 'usascii')
2462 ne(cset1, 'USASCII')
2463 ne(cset1, 'UsAsCiI')
2464 ne('usascii', cset1)
2465 ne('USASCII', cset1)
2466 ne('UsAsCiI', cset1)
2467 eq(cset1, cset2)
2468 eq(cset2, cset1)
2469
2470 def test_getaddresses(self):
2471 eq = self.assertEqual
2472 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2473 'Bud Person <bperson@dom.ain>']),
2474 [('Al Person', 'aperson@dom.ain'),
2475 ('Bud Person', 'bperson@dom.ain')])
2476
2477 def test_getaddresses_nasty(self):
2478 eq = self.assertEqual
2479 eq(utils.getaddresses(['foo: ;']), [('', '')])
2480 eq(utils.getaddresses(
2481 ['[]*-- =~$']),
2482 [('', ''), ('', ''), ('', '*--')])
2483 eq(utils.getaddresses(
2484 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2485 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2486
2487 def test_getaddresses_embedded_comment(self):
2488 """Test proper handling of a nested comment"""
2489 eq = self.assertEqual
2490 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2491 eq(addrs[0][1], 'foo@bar.com')
2492
2493 def test_utils_quote_unquote(self):
2494 eq = self.assertEqual
2495 msg = Message()
2496 msg.add_header('content-disposition', 'attachment',
2497 filename='foo\\wacky"name')
2498 eq(msg.get_filename(), 'foo\\wacky"name')
2499
2500 def test_get_body_encoding_with_bogus_charset(self):
2501 charset = Charset('not a charset')
2502 self.assertEqual(charset.get_body_encoding(), 'base64')
2503
2504 def test_get_body_encoding_with_uppercase_charset(self):
2505 eq = self.assertEqual
2506 msg = Message()
2507 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2508 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2509 charsets = msg.get_charsets()
2510 eq(len(charsets), 1)
2511 eq(charsets[0], 'utf-8')
2512 charset = Charset(charsets[0])
2513 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002514 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002515 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2516 eq(msg.get_payload(decode=True), b'hello world')
2517 eq(msg['content-transfer-encoding'], 'base64')
2518 # Try another one
2519 msg = Message()
2520 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2521 charsets = msg.get_charsets()
2522 eq(len(charsets), 1)
2523 eq(charsets[0], 'us-ascii')
2524 charset = Charset(charsets[0])
2525 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2526 msg.set_payload('hello world', charset=charset)
2527 eq(msg.get_payload(), 'hello world')
2528 eq(msg['content-transfer-encoding'], '7bit')
2529
2530 def test_charsets_case_insensitive(self):
2531 lc = Charset('us-ascii')
2532 uc = Charset('US-ASCII')
2533 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2534
2535 def test_partial_falls_inside_message_delivery_status(self):
2536 eq = self.ndiffAssertEqual
2537 # The Parser interface provides chunks of data to FeedParser in 8192
2538 # byte gulps. SF bug #1076485 found one of those chunks inside
2539 # message/delivery-status header block, which triggered an
2540 # unreadline() of NeedMoreData.
2541 msg = self._msgobj('msg_43.txt')
2542 sfp = StringIO()
2543 iterators._structure(msg, sfp)
2544 eq(sfp.getvalue(), """\
2545multipart/report
2546 text/plain
2547 message/delivery-status
2548 text/plain
2549 text/plain
2550 text/plain
2551 text/plain
2552 text/plain
2553 text/plain
2554 text/plain
2555 text/plain
2556 text/plain
2557 text/plain
2558 text/plain
2559 text/plain
2560 text/plain
2561 text/plain
2562 text/plain
2563 text/plain
2564 text/plain
2565 text/plain
2566 text/plain
2567 text/plain
2568 text/plain
2569 text/plain
2570 text/plain
2571 text/plain
2572 text/plain
2573 text/plain
2574 text/rfc822-headers
2575""")
2576
R. David Murraya0b44b52010-12-02 21:47:19 +00002577 def test_make_msgid_domain(self):
2578 self.assertEqual(
2579 email.utils.make_msgid(domain='testdomain-string')[-19:],
2580 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002581
Ezio Melottib3aedd42010-11-20 19:04:17 +00002582
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002583# Test the iterator/generators
2584class TestIterators(TestEmailBase):
2585 def test_body_line_iterator(self):
2586 eq = self.assertEqual
2587 neq = self.ndiffAssertEqual
2588 # First a simple non-multipart message
2589 msg = self._msgobj('msg_01.txt')
2590 it = iterators.body_line_iterator(msg)
2591 lines = list(it)
2592 eq(len(lines), 6)
2593 neq(EMPTYSTRING.join(lines), msg.get_payload())
2594 # Now a more complicated multipart
2595 msg = self._msgobj('msg_02.txt')
2596 it = iterators.body_line_iterator(msg)
2597 lines = list(it)
2598 eq(len(lines), 43)
2599 with openfile('msg_19.txt') as fp:
2600 neq(EMPTYSTRING.join(lines), fp.read())
2601
2602 def test_typed_subpart_iterator(self):
2603 eq = self.assertEqual
2604 msg = self._msgobj('msg_04.txt')
2605 it = iterators.typed_subpart_iterator(msg, 'text')
2606 lines = []
2607 subparts = 0
2608 for subpart in it:
2609 subparts += 1
2610 lines.append(subpart.get_payload())
2611 eq(subparts, 2)
2612 eq(EMPTYSTRING.join(lines), """\
2613a simple kind of mirror
2614to reflect upon our own
2615a simple kind of mirror
2616to reflect upon our own
2617""")
2618
2619 def test_typed_subpart_iterator_default_type(self):
2620 eq = self.assertEqual
2621 msg = self._msgobj('msg_03.txt')
2622 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2623 lines = []
2624 subparts = 0
2625 for subpart in it:
2626 subparts += 1
2627 lines.append(subpart.get_payload())
2628 eq(subparts, 1)
2629 eq(EMPTYSTRING.join(lines), """\
2630
2631Hi,
2632
2633Do you like this message?
2634
2635-Me
2636""")
2637
R. David Murray45bf773f2010-07-17 01:19:57 +00002638 def test_pushCR_LF(self):
2639 '''FeedParser BufferedSubFile.push() assumed it received complete
2640 line endings. A CR ending one push() followed by a LF starting
2641 the next push() added an empty line.
2642 '''
2643 imt = [
2644 ("a\r \n", 2),
2645 ("b", 0),
2646 ("c\n", 1),
2647 ("", 0),
2648 ("d\r\n", 1),
2649 ("e\r", 0),
2650 ("\nf", 1),
2651 ("\r\n", 1),
2652 ]
2653 from email.feedparser import BufferedSubFile, NeedMoreData
2654 bsf = BufferedSubFile()
2655 om = []
2656 nt = 0
2657 for il, n in imt:
2658 bsf.push(il)
2659 nt += n
2660 n1 = 0
2661 while True:
2662 ol = bsf.readline()
2663 if ol == NeedMoreData:
2664 break
2665 om.append(ol)
2666 n1 += 1
2667 self.assertTrue(n == n1)
2668 self.assertTrue(len(om) == nt)
2669 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2670
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002671
Ezio Melottib3aedd42010-11-20 19:04:17 +00002672
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002673class TestParsers(TestEmailBase):
2674 def test_header_parser(self):
2675 eq = self.assertEqual
2676 # Parse only the headers of a complex multipart MIME document
2677 with openfile('msg_02.txt') as fp:
2678 msg = HeaderParser().parse(fp)
2679 eq(msg['from'], 'ppp-request@zzz.org')
2680 eq(msg['to'], 'ppp@zzz.org')
2681 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002682 self.assertFalse(msg.is_multipart())
2683 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002684
2685 def test_whitespace_continuation(self):
2686 eq = self.assertEqual
2687 # This message contains a line after the Subject: header that has only
2688 # whitespace, but it is not empty!
2689 msg = email.message_from_string("""\
2690From: aperson@dom.ain
2691To: bperson@dom.ain
2692Subject: the next line has a space on it
2693\x20
2694Date: Mon, 8 Apr 2002 15:09:19 -0400
2695Message-ID: spam
2696
2697Here's the message body
2698""")
2699 eq(msg['subject'], 'the next line has a space on it\n ')
2700 eq(msg['message-id'], 'spam')
2701 eq(msg.get_payload(), "Here's the message body\n")
2702
2703 def test_whitespace_continuation_last_header(self):
2704 eq = self.assertEqual
2705 # Like the previous test, but the subject line is the last
2706 # header.
2707 msg = email.message_from_string("""\
2708From: aperson@dom.ain
2709To: bperson@dom.ain
2710Date: Mon, 8 Apr 2002 15:09:19 -0400
2711Message-ID: spam
2712Subject: the next line has a space on it
2713\x20
2714
2715Here's the message body
2716""")
2717 eq(msg['subject'], 'the next line has a space on it\n ')
2718 eq(msg['message-id'], 'spam')
2719 eq(msg.get_payload(), "Here's the message body\n")
2720
2721 def test_crlf_separation(self):
2722 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002723 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002724 msg = Parser().parse(fp)
2725 eq(len(msg.get_payload()), 2)
2726 part1 = msg.get_payload(0)
2727 eq(part1.get_content_type(), 'text/plain')
2728 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2729 part2 = msg.get_payload(1)
2730 eq(part2.get_content_type(), 'application/riscos')
2731
R. David Murray8451c4b2010-10-23 22:19:56 +00002732 def test_crlf_flatten(self):
2733 # Using newline='\n' preserves the crlfs in this input file.
2734 with openfile('msg_26.txt', newline='\n') as fp:
2735 text = fp.read()
2736 msg = email.message_from_string(text)
2737 s = StringIO()
2738 g = Generator(s)
2739 g.flatten(msg, linesep='\r\n')
2740 self.assertEqual(s.getvalue(), text)
2741
2742 maxDiff = None
2743
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002744 def test_multipart_digest_with_extra_mime_headers(self):
2745 eq = self.assertEqual
2746 neq = self.ndiffAssertEqual
2747 with openfile('msg_28.txt') as fp:
2748 msg = email.message_from_file(fp)
2749 # Structure is:
2750 # multipart/digest
2751 # message/rfc822
2752 # text/plain
2753 # message/rfc822
2754 # text/plain
2755 eq(msg.is_multipart(), 1)
2756 eq(len(msg.get_payload()), 2)
2757 part1 = msg.get_payload(0)
2758 eq(part1.get_content_type(), 'message/rfc822')
2759 eq(part1.is_multipart(), 1)
2760 eq(len(part1.get_payload()), 1)
2761 part1a = part1.get_payload(0)
2762 eq(part1a.is_multipart(), 0)
2763 eq(part1a.get_content_type(), 'text/plain')
2764 neq(part1a.get_payload(), 'message 1\n')
2765 # next message/rfc822
2766 part2 = msg.get_payload(1)
2767 eq(part2.get_content_type(), 'message/rfc822')
2768 eq(part2.is_multipart(), 1)
2769 eq(len(part2.get_payload()), 1)
2770 part2a = part2.get_payload(0)
2771 eq(part2a.is_multipart(), 0)
2772 eq(part2a.get_content_type(), 'text/plain')
2773 neq(part2a.get_payload(), 'message 2\n')
2774
2775 def test_three_lines(self):
2776 # A bug report by Andrew McNamara
2777 lines = ['From: Andrew Person <aperson@dom.ain',
2778 'Subject: Test',
2779 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2780 msg = email.message_from_string(NL.join(lines))
2781 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
2782
2783 def test_strip_line_feed_and_carriage_return_in_headers(self):
2784 eq = self.assertEqual
2785 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
2786 value1 = 'text'
2787 value2 = 'more text'
2788 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
2789 value1, value2)
2790 msg = email.message_from_string(m)
2791 eq(msg.get('Header'), value1)
2792 eq(msg.get('Next-Header'), value2)
2793
2794 def test_rfc2822_header_syntax(self):
2795 eq = self.assertEqual
2796 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2797 msg = email.message_from_string(m)
2798 eq(len(msg), 3)
2799 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
2800 eq(msg.get_payload(), 'body')
2801
2802 def test_rfc2822_space_not_allowed_in_header(self):
2803 eq = self.assertEqual
2804 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2805 msg = email.message_from_string(m)
2806 eq(len(msg.keys()), 0)
2807
2808 def test_rfc2822_one_character_header(self):
2809 eq = self.assertEqual
2810 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
2811 msg = email.message_from_string(m)
2812 headers = msg.keys()
2813 headers.sort()
2814 eq(headers, ['A', 'B', 'CC'])
2815 eq(msg.get_payload(), 'body')
2816
R. David Murray45e0e142010-06-16 02:19:40 +00002817 def test_CRLFLF_at_end_of_part(self):
2818 # issue 5610: feedparser should not eat two chars from body part ending
2819 # with "\r\n\n".
2820 m = (
2821 "From: foo@bar.com\n"
2822 "To: baz\n"
2823 "Mime-Version: 1.0\n"
2824 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
2825 "\n"
2826 "--BOUNDARY\n"
2827 "Content-Type: text/plain\n"
2828 "\n"
2829 "body ending with CRLF newline\r\n"
2830 "\n"
2831 "--BOUNDARY--\n"
2832 )
2833 msg = email.message_from_string(m)
2834 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002835
Ezio Melottib3aedd42010-11-20 19:04:17 +00002836
R. David Murray96fd54e2010-10-08 15:55:28 +00002837class Test8BitBytesHandling(unittest.TestCase):
2838 # In Python3 all input is string, but that doesn't work if the actual input
2839 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
2840 # decode byte streams using the surrogateescape error handler, and
2841 # reconvert to binary at appropriate places if we detect surrogates. This
2842 # doesn't allow us to transform headers with 8bit bytes (they get munged),
2843 # but it does allow us to parse and preserve them, and to decode body
2844 # parts that use an 8bit CTE.
2845
2846 bodytest_msg = textwrap.dedent("""\
2847 From: foo@bar.com
2848 To: baz
2849 Mime-Version: 1.0
2850 Content-Type: text/plain; charset={charset}
2851 Content-Transfer-Encoding: {cte}
2852
2853 {bodyline}
2854 """)
2855
2856 def test_known_8bit_CTE(self):
2857 m = self.bodytest_msg.format(charset='utf-8',
2858 cte='8bit',
2859 bodyline='pöstal').encode('utf-8')
2860 msg = email.message_from_bytes(m)
2861 self.assertEqual(msg.get_payload(), "pöstal\n")
2862 self.assertEqual(msg.get_payload(decode=True),
2863 "pöstal\n".encode('utf-8'))
2864
2865 def test_unknown_8bit_CTE(self):
2866 m = self.bodytest_msg.format(charset='notavalidcharset',
2867 cte='8bit',
2868 bodyline='pöstal').encode('utf-8')
2869 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00002870 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00002871 self.assertEqual(msg.get_payload(decode=True),
2872 "pöstal\n".encode('utf-8'))
2873
2874 def test_8bit_in_quopri_body(self):
2875 # This is non-RFC compliant data...without 'decode' the library code
2876 # decodes the body using the charset from the headers, and because the
2877 # source byte really is utf-8 this works. This is likely to fail
2878 # against real dirty data (ie: produce mojibake), but the data is
2879 # invalid anyway so it is as good a guess as any. But this means that
2880 # this test just confirms the current behavior; that behavior is not
2881 # necessarily the best possible behavior. With 'decode' it is
2882 # returning the raw bytes, so that test should be of correct behavior,
2883 # or at least produce the same result that email4 did.
2884 m = self.bodytest_msg.format(charset='utf-8',
2885 cte='quoted-printable',
2886 bodyline='p=C3=B6stál').encode('utf-8')
2887 msg = email.message_from_bytes(m)
2888 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
2889 self.assertEqual(msg.get_payload(decode=True),
2890 'pöstál\n'.encode('utf-8'))
2891
2892 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
2893 # This is similar to the previous test, but proves that if the 8bit
2894 # byte is undecodeable in the specified charset, it gets replaced
2895 # by the unicode 'unknown' character. Again, this may or may not
2896 # be the ideal behavior. Note that if decode=False none of the
2897 # decoders will get involved, so this is the only test we need
2898 # for this behavior.
2899 m = self.bodytest_msg.format(charset='ascii',
2900 cte='quoted-printable',
2901 bodyline='p=C3=B6stál').encode('utf-8')
2902 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00002903 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00002904 self.assertEqual(msg.get_payload(decode=True),
2905 'pöstál\n'.encode('utf-8'))
2906
2907 def test_8bit_in_base64_body(self):
2908 # Sticking an 8bit byte in a base64 block makes it undecodable by
2909 # normal means, so the block is returned undecoded, but as bytes.
2910 m = self.bodytest_msg.format(charset='utf-8',
2911 cte='base64',
2912 bodyline='cMO2c3RhbAá=').encode('utf-8')
2913 msg = email.message_from_bytes(m)
2914 self.assertEqual(msg.get_payload(decode=True),
2915 'cMO2c3RhbAá=\n'.encode('utf-8'))
2916
2917 def test_8bit_in_uuencode_body(self):
2918 # Sticking an 8bit byte in a uuencode block makes it undecodable by
2919 # normal means, so the block is returned undecoded, but as bytes.
2920 m = self.bodytest_msg.format(charset='utf-8',
2921 cte='uuencode',
2922 bodyline='<,.V<W1A; á ').encode('utf-8')
2923 msg = email.message_from_bytes(m)
2924 self.assertEqual(msg.get_payload(decode=True),
2925 '<,.V<W1A; á \n'.encode('utf-8'))
2926
2927
R. David Murray92532142011-01-07 23:25:30 +00002928 headertest_headers = (
2929 ('From: foo@bar.com', ('From', 'foo@bar.com')),
2930 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
2931 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
2932 '\tJean de Baddie',
2933 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
2934 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
2935 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
2936 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
2937 )
2938 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
2939 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00002940
2941 def test_get_8bit_header(self):
2942 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00002943 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
2944 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00002945
2946 def test_print_8bit_headers(self):
2947 msg = email.message_from_bytes(self.headertest_msg)
2948 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00002949 textwrap.dedent("""\
2950 From: {}
2951 To: {}
2952 Subject: {}
2953 From: {}
2954
2955 Yes, they are flying.
2956 """).format(*[expected[1] for (_, expected) in
2957 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00002958
2959 def test_values_with_8bit_headers(self):
2960 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00002961 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00002962 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00002963 'b\uFFFD\uFFFDz',
2964 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
2965 'coll\uFFFD\uFFFDgue, le pouf '
2966 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00002967 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00002968 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00002969
2970 def test_items_with_8bit_headers(self):
2971 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00002972 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00002973 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00002974 ('To', 'b\uFFFD\uFFFDz'),
2975 ('Subject', 'Maintenant je vous '
2976 'pr\uFFFD\uFFFDsente '
2977 'mon coll\uFFFD\uFFFDgue, le pouf '
2978 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
2979 '\tJean de Baddie'),
2980 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00002981
2982 def test_get_all_with_8bit_headers(self):
2983 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00002984 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00002985 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00002986 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00002987
2988 non_latin_bin_msg = textwrap.dedent("""\
2989 From: foo@bar.com
2990 To: báz
2991 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
2992 \tJean de Baddie
2993 Mime-Version: 1.0
2994 Content-Type: text/plain; charset="utf-8"
2995 Content-Transfer-Encoding: 8bit
2996
2997 Да, они летят.
2998 """).encode('utf-8')
2999
3000 def test_bytes_generator(self):
3001 msg = email.message_from_bytes(self.non_latin_bin_msg)
3002 out = BytesIO()
3003 email.generator.BytesGenerator(out).flatten(msg)
3004 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3005
R. David Murray7372a072011-01-26 21:21:32 +00003006 def test_bytes_generator_handles_None_body(self):
3007 #Issue 11019
3008 msg = email.message.Message()
3009 out = BytesIO()
3010 email.generator.BytesGenerator(out).flatten(msg)
3011 self.assertEqual(out.getvalue(), b"\n")
3012
R. David Murray92532142011-01-07 23:25:30 +00003013 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003014 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003015 To: =?unknown-8bit?q?b=C3=A1z?=
3016 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3017 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3018 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003019 Mime-Version: 1.0
3020 Content-Type: text/plain; charset="utf-8"
3021 Content-Transfer-Encoding: base64
3022
3023 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3024 """)
3025
3026 def test_generator_handles_8bit(self):
3027 msg = email.message_from_bytes(self.non_latin_bin_msg)
3028 out = StringIO()
3029 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003030 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003031
3032 def test_bytes_generator_with_unix_from(self):
3033 # The unixfrom contains a current date, so we can't check it
3034 # literally. Just make sure the first word is 'From' and the
3035 # rest of the message matches the input.
3036 msg = email.message_from_bytes(self.non_latin_bin_msg)
3037 out = BytesIO()
3038 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3039 lines = out.getvalue().split(b'\n')
3040 self.assertEqual(lines[0].split()[0], b'From')
3041 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3042
R. David Murray92532142011-01-07 23:25:30 +00003043 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3044 non_latin_bin_msg_as7bit[2:4] = [
3045 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3046 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3047 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3048
R. David Murray96fd54e2010-10-08 15:55:28 +00003049 def test_message_from_binary_file(self):
3050 fn = 'test.msg'
3051 self.addCleanup(unlink, fn)
3052 with open(fn, 'wb') as testfile:
3053 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003054 with open(fn, 'rb') as testfile:
3055 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003056 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3057
3058 latin_bin_msg = textwrap.dedent("""\
3059 From: foo@bar.com
3060 To: Dinsdale
3061 Subject: Nudge nudge, wink, wink
3062 Mime-Version: 1.0
3063 Content-Type: text/plain; charset="latin-1"
3064 Content-Transfer-Encoding: 8bit
3065
3066 oh là là, know what I mean, know what I mean?
3067 """).encode('latin-1')
3068
3069 latin_bin_msg_as7bit = textwrap.dedent("""\
3070 From: foo@bar.com
3071 To: Dinsdale
3072 Subject: Nudge nudge, wink, wink
3073 Mime-Version: 1.0
3074 Content-Type: text/plain; charset="iso-8859-1"
3075 Content-Transfer-Encoding: quoted-printable
3076
3077 oh l=E0 l=E0, know what I mean, know what I mean?
3078 """)
3079
3080 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3081 m = email.message_from_bytes(self.latin_bin_msg)
3082 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3083
3084 def test_decoded_generator_emits_unicode_body(self):
3085 m = email.message_from_bytes(self.latin_bin_msg)
3086 out = StringIO()
3087 email.generator.DecodedGenerator(out).flatten(m)
3088 #DecodedHeader output contains an extra blank line compared
3089 #to the input message. RDM: not sure if this is a bug or not,
3090 #but it is not specific to the 8bit->7bit conversion.
3091 self.assertEqual(out.getvalue(),
3092 self.latin_bin_msg.decode('latin-1')+'\n')
3093
3094 def test_bytes_feedparser(self):
3095 bfp = email.feedparser.BytesFeedParser()
3096 for i in range(0, len(self.latin_bin_msg), 10):
3097 bfp.feed(self.latin_bin_msg[i:i+10])
3098 m = bfp.close()
3099 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3100
R. David Murray8451c4b2010-10-23 22:19:56 +00003101 def test_crlf_flatten(self):
3102 with openfile('msg_26.txt', 'rb') as fp:
3103 text = fp.read()
3104 msg = email.message_from_bytes(text)
3105 s = BytesIO()
3106 g = email.generator.BytesGenerator(s)
3107 g.flatten(msg, linesep='\r\n')
3108 self.assertEqual(s.getvalue(), text)
3109 maxDiff = None
3110
Ezio Melottib3aedd42010-11-20 19:04:17 +00003111
R. David Murray719a4492010-11-21 16:53:48 +00003112class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003113
R. David Murraye5db2632010-11-20 15:10:13 +00003114 maxDiff = None
3115
R. David Murray96fd54e2010-10-08 15:55:28 +00003116 def _msgobj(self, filename):
3117 with openfile(filename, 'rb') as fp:
3118 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003119 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003120 msg = email.message_from_bytes(data)
3121 return msg, data
3122
R. David Murray719a4492010-11-21 16:53:48 +00003123 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003124 b = BytesIO()
3125 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003126 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R. David Murraye5db2632010-11-20 15:10:13 +00003127 self.assertByteStringsEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003128
R. David Murraye5db2632010-11-20 15:10:13 +00003129 def assertByteStringsEqual(self, str1, str2):
R. David Murray719a4492010-11-21 16:53:48 +00003130 # Not using self.blinesep here is intentional. This way the output
3131 # is more useful when the failure results in mixed line endings.
R. David Murray96fd54e2010-10-08 15:55:28 +00003132 self.assertListEqual(str1.split(b'\n'), str2.split(b'\n'))
3133
3134
R. David Murray719a4492010-11-21 16:53:48 +00003135class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3136 TestIdempotent):
3137 linesep = '\n'
3138 blinesep = b'\n'
3139 normalize_linesep_regex = re.compile(br'\r\n')
3140
3141
3142class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3143 TestIdempotent):
3144 linesep = '\r\n'
3145 blinesep = b'\r\n'
3146 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3147
Ezio Melottib3aedd42010-11-20 19:04:17 +00003148
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003149class TestBase64(unittest.TestCase):
3150 def test_len(self):
3151 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003152 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003153 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003154 for size in range(15):
3155 if size == 0 : bsize = 0
3156 elif size <= 3 : bsize = 4
3157 elif size <= 6 : bsize = 8
3158 elif size <= 9 : bsize = 12
3159 elif size <= 12: bsize = 16
3160 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003161 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003162
3163 def test_decode(self):
3164 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003165 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003166 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003167
3168 def test_encode(self):
3169 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003170 eq(base64mime.body_encode(b''), b'')
3171 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003172 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003173 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003174 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003175 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003176eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3177eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3178eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3179eHh4eCB4eHh4IA==
3180""")
3181 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003182 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003183 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003184eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3185eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3186eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3187eHh4eCB4eHh4IA==\r
3188""")
3189
3190 def test_header_encode(self):
3191 eq = self.assertEqual
3192 he = base64mime.header_encode
3193 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003194 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3195 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003196 # Test the charset option
3197 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3198 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003199
3200
Ezio Melottib3aedd42010-11-20 19:04:17 +00003201
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003202class TestQuopri(unittest.TestCase):
3203 def setUp(self):
3204 # Set of characters (as byte integers) that don't need to be encoded
3205 # in headers.
3206 self.hlit = list(chain(
3207 range(ord('a'), ord('z') + 1),
3208 range(ord('A'), ord('Z') + 1),
3209 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003210 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003211 # Set of characters (as byte integers) that do need to be encoded in
3212 # headers.
3213 self.hnon = [c for c in range(256) if c not in self.hlit]
3214 assert len(self.hlit) + len(self.hnon) == 256
3215 # Set of characters (as byte integers) that don't need to be encoded
3216 # in bodies.
3217 self.blit = list(range(ord(' '), ord('~') + 1))
3218 self.blit.append(ord('\t'))
3219 self.blit.remove(ord('='))
3220 # Set of characters (as byte integers) that do need to be encoded in
3221 # bodies.
3222 self.bnon = [c for c in range(256) if c not in self.blit]
3223 assert len(self.blit) + len(self.bnon) == 256
3224
Guido van Rossum9604e662007-08-30 03:46:43 +00003225 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003226 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003227 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003228 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003229 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003230 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003231 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003232
Guido van Rossum9604e662007-08-30 03:46:43 +00003233 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003234 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003235 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003236 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003237 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003238 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003239 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003240
3241 def test_header_quopri_len(self):
3242 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003243 eq(quoprimime.header_length(b'hello'), 5)
3244 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003245 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003246 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003247 # =?xxx?q?...?= means 10 extra characters
3248 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003249 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3250 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003251 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003252 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003253 # =?xxx?q?...?= means 10 extra characters
3254 10)
3255 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003256 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003257 'expected length 1 for %r' % chr(c))
3258 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003259 # Space is special; it's encoded to _
3260 if c == ord(' '):
3261 continue
3262 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003263 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003264 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003265
3266 def test_body_quopri_len(self):
3267 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003268 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003269 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003270 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003271 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003272
3273 def test_quote_unquote_idempotent(self):
3274 for x in range(256):
3275 c = chr(x)
3276 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3277
3278 def test_header_encode(self):
3279 eq = self.assertEqual
3280 he = quoprimime.header_encode
3281 eq(he(b'hello'), '=?iso-8859-1?q?hello?=')
3282 eq(he(b'hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=')
3283 eq(he(b'hello\nworld'), '=?iso-8859-1?q?hello=0Aworld?=')
3284 # Test a non-ASCII character
3285 eq(he(b'hello\xc7there'), '=?iso-8859-1?q?hello=C7there?=')
3286
3287 def test_decode(self):
3288 eq = self.assertEqual
3289 eq(quoprimime.decode(''), '')
3290 eq(quoprimime.decode('hello'), 'hello')
3291 eq(quoprimime.decode('hello', 'X'), 'hello')
3292 eq(quoprimime.decode('hello\nworld', 'X'), 'helloXworld')
3293
3294 def test_encode(self):
3295 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003296 eq(quoprimime.body_encode(''), '')
3297 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003298 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003299 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003300 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003301 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003302xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3303 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3304x xxxx xxxx xxxx xxxx=20""")
3305 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003306 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3307 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003308xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3309 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3310x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003311 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003312one line
3313
3314two line"""), """\
3315one line
3316
3317two line""")
3318
3319
Ezio Melottib3aedd42010-11-20 19:04:17 +00003320
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003321# Test the Charset class
3322class TestCharset(unittest.TestCase):
3323 def tearDown(self):
3324 from email import charset as CharsetModule
3325 try:
3326 del CharsetModule.CHARSETS['fake']
3327 except KeyError:
3328 pass
3329
Guido van Rossum9604e662007-08-30 03:46:43 +00003330 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003331 eq = self.assertEqual
3332 # Make sure us-ascii = no Unicode conversion
3333 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00003334 eq(c.header_encode('Hello World!'), 'Hello World!')
3335 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003336 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00003337 self.assertRaises(UnicodeError, c.header_encode, s)
3338 c = Charset('utf-8')
3339 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003340
3341 def test_body_encode(self):
3342 eq = self.assertEqual
3343 # Try a charset with QP body encoding
3344 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003345 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003346 # Try a charset with Base64 body encoding
3347 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003348 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003349 # Try a charset with None body encoding
3350 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003351 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003352 # Try the convert argument, where input codec != output codec
3353 c = Charset('euc-jp')
3354 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00003355 # XXX FIXME
3356## try:
3357## eq('\x1b$B5FCO;~IW\x1b(B',
3358## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
3359## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
3360## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
3361## except LookupError:
3362## # We probably don't have the Japanese codecs installed
3363## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003364 # Testing SF bug #625509, which we have to fake, since there are no
3365 # built-in encodings where the header encoding is QP but the body
3366 # encoding is not.
3367 from email import charset as CharsetModule
3368 CharsetModule.add_charset('fake', CharsetModule.QP, None)
3369 c = Charset('fake')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003370 eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003371
3372 def test_unicode_charset_name(self):
3373 charset = Charset('us-ascii')
3374 self.assertEqual(str(charset), 'us-ascii')
3375 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
3376
3377
Ezio Melottib3aedd42010-11-20 19:04:17 +00003378
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003379# Test multilingual MIME headers.
3380class TestHeader(TestEmailBase):
3381 def test_simple(self):
3382 eq = self.ndiffAssertEqual
3383 h = Header('Hello World!')
3384 eq(h.encode(), 'Hello World!')
3385 h.append(' Goodbye World!')
3386 eq(h.encode(), 'Hello World! Goodbye World!')
3387
3388 def test_simple_surprise(self):
3389 eq = self.ndiffAssertEqual
3390 h = Header('Hello World!')
3391 eq(h.encode(), 'Hello World!')
3392 h.append('Goodbye World!')
3393 eq(h.encode(), 'Hello World! Goodbye World!')
3394
3395 def test_header_needs_no_decoding(self):
3396 h = 'no decoding needed'
3397 self.assertEqual(decode_header(h), [(h, None)])
3398
3399 def test_long(self):
3400 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
3401 maxlinelen=76)
3402 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003403 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003404
3405 def test_multilingual(self):
3406 eq = self.ndiffAssertEqual
3407 g = Charset("iso-8859-1")
3408 cz = Charset("iso-8859-2")
3409 utf8 = Charset("utf-8")
3410 g_head = (b'Die Mieter treten hier ein werden mit einem '
3411 b'Foerderband komfortabel den Korridor entlang, '
3412 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
3413 b'gegen die rotierenden Klingen bef\xf6rdert. ')
3414 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
3415 b'd\xf9vtipu.. ')
3416 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
3417 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
3418 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
3419 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
3420 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
3421 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
3422 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
3423 '\u3044\u307e\u3059\u3002')
3424 h = Header(g_head, g)
3425 h.append(cz_head, cz)
3426 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00003427 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003428 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00003429=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
3430 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
3431 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
3432 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003433 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
3434 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
3435 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
3436 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00003437 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
3438 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
3439 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
3440 decoded = decode_header(enc)
3441 eq(len(decoded), 3)
3442 eq(decoded[0], (g_head, 'iso-8859-1'))
3443 eq(decoded[1], (cz_head, 'iso-8859-2'))
3444 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003445 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00003446 eq(ustr,
3447 (b'Die Mieter treten hier ein werden mit einem Foerderband '
3448 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
3449 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
3450 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
3451 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
3452 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
3453 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
3454 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
3455 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
3456 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
3457 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
3458 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
3459 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
3460 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
3461 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
3462 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
3463 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003464 # Test make_header()
3465 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00003466 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003467
3468 def test_empty_header_encode(self):
3469 h = Header()
3470 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00003471
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003472 def test_header_ctor_default_args(self):
3473 eq = self.ndiffAssertEqual
3474 h = Header()
3475 eq(h, '')
3476 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00003477 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003478
3479 def test_explicit_maxlinelen(self):
3480 eq = self.ndiffAssertEqual
3481 hstr = ('A very long line that must get split to something other '
3482 'than at the 76th character boundary to test the non-default '
3483 'behavior')
3484 h = Header(hstr)
3485 eq(h.encode(), '''\
3486A very long line that must get split to something other than at the 76th
3487 character boundary to test the non-default behavior''')
3488 eq(str(h), hstr)
3489 h = Header(hstr, header_name='Subject')
3490 eq(h.encode(), '''\
3491A very long line that must get split to something other than at the
3492 76th character boundary to test the non-default behavior''')
3493 eq(str(h), hstr)
3494 h = Header(hstr, maxlinelen=1024, header_name='Subject')
3495 eq(h.encode(), hstr)
3496 eq(str(h), hstr)
3497
Guido van Rossum9604e662007-08-30 03:46:43 +00003498 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003499 eq = self.ndiffAssertEqual
3500 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003501 x = 'xxxx ' * 20
3502 h.append(x)
3503 s = h.encode()
3504 eq(s, """\
3505=?iso-8859-1?q?xxx?=
3506 =?iso-8859-1?q?x_?=
3507 =?iso-8859-1?q?xx?=
3508 =?iso-8859-1?q?xx?=
3509 =?iso-8859-1?q?_x?=
3510 =?iso-8859-1?q?xx?=
3511 =?iso-8859-1?q?x_?=
3512 =?iso-8859-1?q?xx?=
3513 =?iso-8859-1?q?xx?=
3514 =?iso-8859-1?q?_x?=
3515 =?iso-8859-1?q?xx?=
3516 =?iso-8859-1?q?x_?=
3517 =?iso-8859-1?q?xx?=
3518 =?iso-8859-1?q?xx?=
3519 =?iso-8859-1?q?_x?=
3520 =?iso-8859-1?q?xx?=
3521 =?iso-8859-1?q?x_?=
3522 =?iso-8859-1?q?xx?=
3523 =?iso-8859-1?q?xx?=
3524 =?iso-8859-1?q?_x?=
3525 =?iso-8859-1?q?xx?=
3526 =?iso-8859-1?q?x_?=
3527 =?iso-8859-1?q?xx?=
3528 =?iso-8859-1?q?xx?=
3529 =?iso-8859-1?q?_x?=
3530 =?iso-8859-1?q?xx?=
3531 =?iso-8859-1?q?x_?=
3532 =?iso-8859-1?q?xx?=
3533 =?iso-8859-1?q?xx?=
3534 =?iso-8859-1?q?_x?=
3535 =?iso-8859-1?q?xx?=
3536 =?iso-8859-1?q?x_?=
3537 =?iso-8859-1?q?xx?=
3538 =?iso-8859-1?q?xx?=
3539 =?iso-8859-1?q?_x?=
3540 =?iso-8859-1?q?xx?=
3541 =?iso-8859-1?q?x_?=
3542 =?iso-8859-1?q?xx?=
3543 =?iso-8859-1?q?xx?=
3544 =?iso-8859-1?q?_x?=
3545 =?iso-8859-1?q?xx?=
3546 =?iso-8859-1?q?x_?=
3547 =?iso-8859-1?q?xx?=
3548 =?iso-8859-1?q?xx?=
3549 =?iso-8859-1?q?_x?=
3550 =?iso-8859-1?q?xx?=
3551 =?iso-8859-1?q?x_?=
3552 =?iso-8859-1?q?xx?=
3553 =?iso-8859-1?q?xx?=
3554 =?iso-8859-1?q?_?=""")
3555 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003556 h = Header(charset='iso-8859-1', maxlinelen=40)
3557 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003558 s = h.encode()
3559 eq(s, """\
3560=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
3561 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
3562 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
3563 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
3564 =?iso-8859-1?q?_xxxx_xxxx_?=""")
3565 eq(x, str(make_header(decode_header(s))))
3566
3567 def test_base64_splittable(self):
3568 eq = self.ndiffAssertEqual
3569 h = Header(charset='koi8-r', maxlinelen=20)
3570 x = 'xxxx ' * 20
3571 h.append(x)
3572 s = h.encode()
3573 eq(s, """\
3574=?koi8-r?b?eHh4?=
3575 =?koi8-r?b?eCB4?=
3576 =?koi8-r?b?eHh4?=
3577 =?koi8-r?b?IHh4?=
3578 =?koi8-r?b?eHgg?=
3579 =?koi8-r?b?eHh4?=
3580 =?koi8-r?b?eCB4?=
3581 =?koi8-r?b?eHh4?=
3582 =?koi8-r?b?IHh4?=
3583 =?koi8-r?b?eHgg?=
3584 =?koi8-r?b?eHh4?=
3585 =?koi8-r?b?eCB4?=
3586 =?koi8-r?b?eHh4?=
3587 =?koi8-r?b?IHh4?=
3588 =?koi8-r?b?eHgg?=
3589 =?koi8-r?b?eHh4?=
3590 =?koi8-r?b?eCB4?=
3591 =?koi8-r?b?eHh4?=
3592 =?koi8-r?b?IHh4?=
3593 =?koi8-r?b?eHgg?=
3594 =?koi8-r?b?eHh4?=
3595 =?koi8-r?b?eCB4?=
3596 =?koi8-r?b?eHh4?=
3597 =?koi8-r?b?IHh4?=
3598 =?koi8-r?b?eHgg?=
3599 =?koi8-r?b?eHh4?=
3600 =?koi8-r?b?eCB4?=
3601 =?koi8-r?b?eHh4?=
3602 =?koi8-r?b?IHh4?=
3603 =?koi8-r?b?eHgg?=
3604 =?koi8-r?b?eHh4?=
3605 =?koi8-r?b?eCB4?=
3606 =?koi8-r?b?eHh4?=
3607 =?koi8-r?b?IA==?=""")
3608 eq(x, str(make_header(decode_header(s))))
3609 h = Header(charset='koi8-r', maxlinelen=40)
3610 h.append(x)
3611 s = h.encode()
3612 eq(s, """\
3613=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
3614 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
3615 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
3616 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
3617 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
3618 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
3619 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003620
3621 def test_us_ascii_header(self):
3622 eq = self.assertEqual
3623 s = 'hello'
3624 x = decode_header(s)
3625 eq(x, [('hello', None)])
3626 h = make_header(x)
3627 eq(s, h.encode())
3628
3629 def test_string_charset(self):
3630 eq = self.assertEqual
3631 h = Header()
3632 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00003633 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003634
3635## def test_unicode_error(self):
3636## raises = self.assertRaises
3637## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
3638## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
3639## h = Header()
3640## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
3641## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
3642## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
3643
3644 def test_utf8_shortest(self):
3645 eq = self.assertEqual
3646 h = Header('p\xf6stal', 'utf-8')
3647 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
3648 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
3649 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
3650
3651 def test_bad_8bit_header(self):
3652 raises = self.assertRaises
3653 eq = self.assertEqual
3654 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
3655 raises(UnicodeError, Header, x)
3656 h = Header()
3657 raises(UnicodeError, h.append, x)
3658 e = x.decode('utf-8', 'replace')
3659 eq(str(Header(x, errors='replace')), e)
3660 h.append(x, errors='replace')
3661 eq(str(h), e)
3662
3663 def test_encoded_adjacent_nonencoded(self):
3664 eq = self.assertEqual
3665 h = Header()
3666 h.append('hello', 'iso-8859-1')
3667 h.append('world')
3668 s = h.encode()
3669 eq(s, '=?iso-8859-1?q?hello?= world')
3670 h = make_header(decode_header(s))
3671 eq(h.encode(), s)
3672
3673 def test_whitespace_eater(self):
3674 eq = self.assertEqual
3675 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
3676 parts = decode_header(s)
3677 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
3678 hdr = make_header(parts)
3679 eq(hdr.encode(),
3680 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
3681
3682 def test_broken_base64_header(self):
3683 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00003684 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003685 raises(errors.HeaderParseError, decode_header, s)
3686
R. David Murray477efb32011-01-05 01:39:32 +00003687 def test_shift_jis_charset(self):
3688 h = Header('文', charset='shift_jis')
3689 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
3690
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003691
Ezio Melottib3aedd42010-11-20 19:04:17 +00003692
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003693# Test RFC 2231 header parameters (en/de)coding
3694class TestRFC2231(TestEmailBase):
3695 def test_get_param(self):
3696 eq = self.assertEqual
3697 msg = self._msgobj('msg_29.txt')
3698 eq(msg.get_param('title'),
3699 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3700 eq(msg.get_param('title', unquote=False),
3701 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
3702
3703 def test_set_param(self):
3704 eq = self.ndiffAssertEqual
3705 msg = Message()
3706 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3707 charset='us-ascii')
3708 eq(msg.get_param('title'),
3709 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
3710 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3711 charset='us-ascii', language='en')
3712 eq(msg.get_param('title'),
3713 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3714 msg = self._msgobj('msg_01.txt')
3715 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3716 charset='us-ascii', language='en')
3717 eq(msg.as_string(maxheaderlen=78), """\
3718Return-Path: <bbb@zzz.org>
3719Delivered-To: bbb@zzz.org
3720Received: by mail.zzz.org (Postfix, from userid 889)
3721\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3722MIME-Version: 1.0
3723Content-Transfer-Encoding: 7bit
3724Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3725From: bbb@ddd.com (John X. Doe)
3726To: bbb@zzz.org
3727Subject: This is a test message
3728Date: Fri, 4 May 2001 14:05:44 -0400
3729Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00003730 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003731
3732
3733Hi,
3734
3735Do you like this message?
3736
3737-Me
3738""")
3739
3740 def test_del_param(self):
3741 eq = self.ndiffAssertEqual
3742 msg = self._msgobj('msg_01.txt')
3743 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
3744 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3745 charset='us-ascii', language='en')
3746 msg.del_param('foo', header='Content-Type')
3747 eq(msg.as_string(maxheaderlen=78), """\
3748Return-Path: <bbb@zzz.org>
3749Delivered-To: bbb@zzz.org
3750Received: by mail.zzz.org (Postfix, from userid 889)
3751\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3752MIME-Version: 1.0
3753Content-Transfer-Encoding: 7bit
3754Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3755From: bbb@ddd.com (John X. Doe)
3756To: bbb@zzz.org
3757Subject: This is a test message
3758Date: Fri, 4 May 2001 14:05:44 -0400
3759Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00003760 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003761
3762
3763Hi,
3764
3765Do you like this message?
3766
3767-Me
3768""")
3769
3770 def test_rfc2231_get_content_charset(self):
3771 eq = self.assertEqual
3772 msg = self._msgobj('msg_32.txt')
3773 eq(msg.get_content_charset(), 'us-ascii')
3774
R. David Murraydfd7eb02010-12-24 22:36:49 +00003775 def test_rfc2231_parse_rfc_quoting(self):
3776 m = textwrap.dedent('''\
3777 Content-Disposition: inline;
3778 \tfilename*0*=''This%20is%20even%20more%20;
3779 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
3780 \tfilename*2="is it not.pdf"
3781
3782 ''')
3783 msg = email.message_from_string(m)
3784 self.assertEqual(msg.get_filename(),
3785 'This is even more ***fun*** is it not.pdf')
3786 self.assertEqual(m, msg.as_string())
3787
3788 def test_rfc2231_parse_extra_quoting(self):
3789 m = textwrap.dedent('''\
3790 Content-Disposition: inline;
3791 \tfilename*0*="''This%20is%20even%20more%20";
3792 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3793 \tfilename*2="is it not.pdf"
3794
3795 ''')
3796 msg = email.message_from_string(m)
3797 self.assertEqual(msg.get_filename(),
3798 'This is even more ***fun*** is it not.pdf')
3799 self.assertEqual(m, msg.as_string())
3800
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003801 def test_rfc2231_no_language_or_charset(self):
3802 m = '''\
3803Content-Transfer-Encoding: 8bit
3804Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
3805Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
3806
3807'''
3808 msg = email.message_from_string(m)
3809 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003810 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003811 self.assertEqual(
3812 param,
3813 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
3814
3815 def test_rfc2231_no_language_or_charset_in_filename(self):
3816 m = '''\
3817Content-Disposition: inline;
3818\tfilename*0*="''This%20is%20even%20more%20";
3819\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3820\tfilename*2="is it not.pdf"
3821
3822'''
3823 msg = email.message_from_string(m)
3824 self.assertEqual(msg.get_filename(),
3825 'This is even more ***fun*** is it not.pdf')
3826
3827 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
3828 m = '''\
3829Content-Disposition: inline;
3830\tfilename*0*="''This%20is%20even%20more%20";
3831\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3832\tfilename*2="is it not.pdf"
3833
3834'''
3835 msg = email.message_from_string(m)
3836 self.assertEqual(msg.get_filename(),
3837 'This is even more ***fun*** is it not.pdf')
3838
3839 def test_rfc2231_partly_encoded(self):
3840 m = '''\
3841Content-Disposition: inline;
3842\tfilename*0="''This%20is%20even%20more%20";
3843\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3844\tfilename*2="is it not.pdf"
3845
3846'''
3847 msg = email.message_from_string(m)
3848 self.assertEqual(
3849 msg.get_filename(),
3850 'This%20is%20even%20more%20***fun*** is it not.pdf')
3851
3852 def test_rfc2231_partly_nonencoded(self):
3853 m = '''\
3854Content-Disposition: inline;
3855\tfilename*0="This%20is%20even%20more%20";
3856\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
3857\tfilename*2="is it not.pdf"
3858
3859'''
3860 msg = email.message_from_string(m)
3861 self.assertEqual(
3862 msg.get_filename(),
3863 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
3864
3865 def test_rfc2231_no_language_or_charset_in_boundary(self):
3866 m = '''\
3867Content-Type: multipart/alternative;
3868\tboundary*0*="''This%20is%20even%20more%20";
3869\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
3870\tboundary*2="is it not.pdf"
3871
3872'''
3873 msg = email.message_from_string(m)
3874 self.assertEqual(msg.get_boundary(),
3875 'This is even more ***fun*** is it not.pdf')
3876
3877 def test_rfc2231_no_language_or_charset_in_charset(self):
3878 # This is a nonsensical charset value, but tests the code anyway
3879 m = '''\
3880Content-Type: text/plain;
3881\tcharset*0*="This%20is%20even%20more%20";
3882\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
3883\tcharset*2="is it not.pdf"
3884
3885'''
3886 msg = email.message_from_string(m)
3887 self.assertEqual(msg.get_content_charset(),
3888 'this is even more ***fun*** is it not.pdf')
3889
3890 def test_rfc2231_bad_encoding_in_filename(self):
3891 m = '''\
3892Content-Disposition: inline;
3893\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
3894\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3895\tfilename*2="is it not.pdf"
3896
3897'''
3898 msg = email.message_from_string(m)
3899 self.assertEqual(msg.get_filename(),
3900 'This is even more ***fun*** is it not.pdf')
3901
3902 def test_rfc2231_bad_encoding_in_charset(self):
3903 m = """\
3904Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
3905
3906"""
3907 msg = email.message_from_string(m)
3908 # This should return None because non-ascii characters in the charset
3909 # are not allowed.
3910 self.assertEqual(msg.get_content_charset(), None)
3911
3912 def test_rfc2231_bad_character_in_charset(self):
3913 m = """\
3914Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
3915
3916"""
3917 msg = email.message_from_string(m)
3918 # This should return None because non-ascii characters in the charset
3919 # are not allowed.
3920 self.assertEqual(msg.get_content_charset(), None)
3921
3922 def test_rfc2231_bad_character_in_filename(self):
3923 m = '''\
3924Content-Disposition: inline;
3925\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
3926\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3927\tfilename*2*="is it not.pdf%E2"
3928
3929'''
3930 msg = email.message_from_string(m)
3931 self.assertEqual(msg.get_filename(),
3932 'This is even more ***fun*** is it not.pdf\ufffd')
3933
3934 def test_rfc2231_unknown_encoding(self):
3935 m = """\
3936Content-Transfer-Encoding: 8bit
3937Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
3938
3939"""
3940 msg = email.message_from_string(m)
3941 self.assertEqual(msg.get_filename(), 'myfile.txt')
3942
3943 def test_rfc2231_single_tick_in_filename_extended(self):
3944 eq = self.assertEqual
3945 m = """\
3946Content-Type: application/x-foo;
3947\tname*0*=\"Frank's\"; name*1*=\" Document\"
3948
3949"""
3950 msg = email.message_from_string(m)
3951 charset, language, s = msg.get_param('name')
3952 eq(charset, None)
3953 eq(language, None)
3954 eq(s, "Frank's Document")
3955
3956 def test_rfc2231_single_tick_in_filename(self):
3957 m = """\
3958Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
3959
3960"""
3961 msg = email.message_from_string(m)
3962 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003963 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003964 self.assertEqual(param, "Frank's Document")
3965
3966 def test_rfc2231_tick_attack_extended(self):
3967 eq = self.assertEqual
3968 m = """\
3969Content-Type: application/x-foo;
3970\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
3971
3972"""
3973 msg = email.message_from_string(m)
3974 charset, language, s = msg.get_param('name')
3975 eq(charset, 'us-ascii')
3976 eq(language, 'en-us')
3977 eq(s, "Frank's Document")
3978
3979 def test_rfc2231_tick_attack(self):
3980 m = """\
3981Content-Type: application/x-foo;
3982\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
3983
3984"""
3985 msg = email.message_from_string(m)
3986 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003987 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003988 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
3989
3990 def test_rfc2231_no_extended_values(self):
3991 eq = self.assertEqual
3992 m = """\
3993Content-Type: application/x-foo; name=\"Frank's Document\"
3994
3995"""
3996 msg = email.message_from_string(m)
3997 eq(msg.get_param('name'), "Frank's Document")
3998
3999 def test_rfc2231_encoded_then_unencoded_segments(self):
4000 eq = self.assertEqual
4001 m = """\
4002Content-Type: application/x-foo;
4003\tname*0*=\"us-ascii'en-us'My\";
4004\tname*1=\" Document\";
4005\tname*2*=\" For You\"
4006
4007"""
4008 msg = email.message_from_string(m)
4009 charset, language, s = msg.get_param('name')
4010 eq(charset, 'us-ascii')
4011 eq(language, 'en-us')
4012 eq(s, 'My Document For You')
4013
4014 def test_rfc2231_unencoded_then_encoded_segments(self):
4015 eq = self.assertEqual
4016 m = """\
4017Content-Type: application/x-foo;
4018\tname*0=\"us-ascii'en-us'My\";
4019\tname*1*=\" Document\";
4020\tname*2*=\" For You\"
4021
4022"""
4023 msg = email.message_from_string(m)
4024 charset, language, s = msg.get_param('name')
4025 eq(charset, 'us-ascii')
4026 eq(language, 'en-us')
4027 eq(s, 'My Document For You')
4028
4029
Ezio Melottib3aedd42010-11-20 19:04:17 +00004030
R. David Murraya8f480f2010-01-16 18:30:03 +00004031# Tests to ensure that signed parts of an email are completely preserved, as
4032# required by RFC1847 section 2.1. Note that these are incomplete, because the
4033# email package does not currently always preserve the body. See issue 1670765.
4034class TestSigned(TestEmailBase):
4035
4036 def _msg_and_obj(self, filename):
4037 with openfile(findfile(filename)) as fp:
4038 original = fp.read()
4039 msg = email.message_from_string(original)
4040 return original, msg
4041
4042 def _signed_parts_eq(self, original, result):
4043 # Extract the first mime part of each message
4044 import re
4045 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4046 inpart = repart.search(original).group(2)
4047 outpart = repart.search(result).group(2)
4048 self.assertEqual(outpart, inpart)
4049
4050 def test_long_headers_as_string(self):
4051 original, msg = self._msg_and_obj('msg_45.txt')
4052 result = msg.as_string()
4053 self._signed_parts_eq(original, result)
4054
4055 def test_long_headers_as_string_maxheaderlen(self):
4056 original, msg = self._msg_and_obj('msg_45.txt')
4057 result = msg.as_string(maxheaderlen=60)
4058 self._signed_parts_eq(original, result)
4059
4060 def test_long_headers_flatten(self):
4061 original, msg = self._msg_and_obj('msg_45.txt')
4062 fp = StringIO()
4063 Generator(fp).flatten(msg)
4064 result = fp.getvalue()
4065 self._signed_parts_eq(original, result)
4066
4067
Ezio Melottib3aedd42010-11-20 19:04:17 +00004068
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004069def _testclasses():
4070 mod = sys.modules[__name__]
4071 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
4072
4073
4074def suite():
4075 suite = unittest.TestSuite()
4076 for testclass in _testclasses():
4077 suite.addTest(unittest.makeSuite(testclass))
4078 return suite
4079
4080
4081def test_main():
4082 for testclass in _testclasses():
4083 run_unittest(testclass)
4084
4085
Ezio Melottib3aedd42010-11-20 19:04:17 +00004086
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004087if __name__ == '__main__':
4088 unittest.main(defaultTest='suite')