blob: 89effcb71df14f5b1a1093a170540e6492965d55 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
6import sys
7import time
8import base64
9import difflib
10import unittest
11import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000012import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000013
R. David Murray96fd54e2010-10-08 15:55:28 +000014from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000015from itertools import chain
16
17import email
18
19from email.charset import Charset
20from email.header import Header, decode_header, make_header
21from email.parser import Parser, HeaderParser
22from email.generator import Generator, DecodedGenerator
23from email.message import Message
24from email.mime.application import MIMEApplication
25from email.mime.audio import MIMEAudio
26from email.mime.text import MIMEText
27from email.mime.image import MIMEImage
28from email.mime.base import MIMEBase
29from email.mime.message import MIMEMessage
30from email.mime.multipart import MIMEMultipart
31from email import utils
32from email import errors
33from email import encoders
34from email import iterators
35from email import base64mime
36from email import quoprimime
37
R. David Murray96fd54e2010-10-08 15:55:28 +000038from test.support import findfile, run_unittest, unlink
Guido van Rossum8b3febe2007-08-30 01:15:14 +000039from email.test import __file__ as landmark
40
41
42NL = '\n'
43EMPTYSTRING = ''
44SPACE = ' '
45
46
47
48def openfile(filename, *args, **kws):
49 path = os.path.join(os.path.dirname(landmark), 'data', filename)
50 return open(path, *args, **kws)
51
52
53
54# Base test class
55class TestEmailBase(unittest.TestCase):
56 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000057 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000058 if first != second:
59 sfirst = str(first)
60 ssecond = str(second)
61 rfirst = [repr(line) for line in sfirst.splitlines()]
62 rsecond = [repr(line) for line in ssecond.splitlines()]
63 diff = difflib.ndiff(rfirst, rsecond)
64 raise self.failureException(NL + NL.join(diff))
65
66 def _msgobj(self, filename):
67 with openfile(findfile(filename)) as fp:
68 return email.message_from_file(fp)
69
70
71
72# Test various aspects of the Message class's API
73class TestMessageAPI(TestEmailBase):
74 def test_get_all(self):
75 eq = self.assertEqual
76 msg = self._msgobj('msg_20.txt')
77 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
78 eq(msg.get_all('xx', 'n/a'), 'n/a')
79
R. David Murraye5db2632010-11-20 15:10:13 +000080 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000081 eq = self.assertEqual
82 msg = Message()
83 eq(msg.get_charset(), None)
84 charset = Charset('iso-8859-1')
85 msg.set_charset(charset)
86 eq(msg['mime-version'], '1.0')
87 eq(msg.get_content_type(), 'text/plain')
88 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
89 eq(msg.get_param('charset'), 'iso-8859-1')
90 eq(msg['content-transfer-encoding'], 'quoted-printable')
91 eq(msg.get_charset().input_charset, 'iso-8859-1')
92 # Remove the charset
93 msg.set_charset(None)
94 eq(msg.get_charset(), None)
95 eq(msg['content-type'], 'text/plain')
96 # Try adding a charset when there's already MIME headers present
97 msg = Message()
98 msg['MIME-Version'] = '2.0'
99 msg['Content-Type'] = 'text/x-weird'
100 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
101 msg.set_charset(charset)
102 eq(msg['mime-version'], '2.0')
103 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
104 eq(msg['content-transfer-encoding'], 'quinted-puntable')
105
106 def test_set_charset_from_string(self):
107 eq = self.assertEqual
108 msg = Message()
109 msg.set_charset('us-ascii')
110 eq(msg.get_charset().input_charset, 'us-ascii')
111 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
112
113 def test_set_payload_with_charset(self):
114 msg = Message()
115 charset = Charset('iso-8859-1')
116 msg.set_payload('This is a string payload', charset)
117 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
118
119 def test_get_charsets(self):
120 eq = self.assertEqual
121
122 msg = self._msgobj('msg_08.txt')
123 charsets = msg.get_charsets()
124 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
125
126 msg = self._msgobj('msg_09.txt')
127 charsets = msg.get_charsets('dingbat')
128 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
129 'koi8-r'])
130
131 msg = self._msgobj('msg_12.txt')
132 charsets = msg.get_charsets()
133 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
134 'iso-8859-3', 'us-ascii', 'koi8-r'])
135
136 def test_get_filename(self):
137 eq = self.assertEqual
138
139 msg = self._msgobj('msg_04.txt')
140 filenames = [p.get_filename() for p in msg.get_payload()]
141 eq(filenames, ['msg.txt', 'msg.txt'])
142
143 msg = self._msgobj('msg_07.txt')
144 subpart = msg.get_payload(1)
145 eq(subpart.get_filename(), 'dingusfish.gif')
146
147 def test_get_filename_with_name_parameter(self):
148 eq = self.assertEqual
149
150 msg = self._msgobj('msg_44.txt')
151 filenames = [p.get_filename() for p in msg.get_payload()]
152 eq(filenames, ['msg.txt', 'msg.txt'])
153
154 def test_get_boundary(self):
155 eq = self.assertEqual
156 msg = self._msgobj('msg_07.txt')
157 # No quotes!
158 eq(msg.get_boundary(), 'BOUNDARY')
159
160 def test_set_boundary(self):
161 eq = self.assertEqual
162 # This one has no existing boundary parameter, but the Content-Type:
163 # header appears fifth.
164 msg = self._msgobj('msg_01.txt')
165 msg.set_boundary('BOUNDARY')
166 header, value = msg.items()[4]
167 eq(header.lower(), 'content-type')
168 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
169 # This one has a Content-Type: header, with a boundary, stuck in the
170 # middle of its headers. Make sure the order is preserved; it should
171 # be fifth.
172 msg = self._msgobj('msg_04.txt')
173 msg.set_boundary('BOUNDARY')
174 header, value = msg.items()[4]
175 eq(header.lower(), 'content-type')
176 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
177 # And this one has no Content-Type: header at all.
178 msg = self._msgobj('msg_03.txt')
179 self.assertRaises(errors.HeaderParseError,
180 msg.set_boundary, 'BOUNDARY')
181
R. David Murray57c45ac2010-02-21 04:39:40 +0000182 def test_message_rfc822_only(self):
183 # Issue 7970: message/rfc822 not in multipart parsed by
184 # HeaderParser caused an exception when flattened.
Brett Cannon384917a2010-10-29 23:08:36 +0000185 with openfile(findfile('msg_46.txt')) as fp:
186 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000187 parser = HeaderParser()
188 msg = parser.parsestr(msgdata)
189 out = StringIO()
190 gen = Generator(out, True, 0)
191 gen.flatten(msg, False)
192 self.assertEqual(out.getvalue(), msgdata)
193
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000194 def test_get_decoded_payload(self):
195 eq = self.assertEqual
196 msg = self._msgobj('msg_10.txt')
197 # The outer message is a multipart
198 eq(msg.get_payload(decode=True), None)
199 # Subpart 1 is 7bit encoded
200 eq(msg.get_payload(0).get_payload(decode=True),
201 b'This is a 7bit encoded message.\n')
202 # Subpart 2 is quopri
203 eq(msg.get_payload(1).get_payload(decode=True),
204 b'\xa1This is a Quoted Printable encoded message!\n')
205 # Subpart 3 is base64
206 eq(msg.get_payload(2).get_payload(decode=True),
207 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000208 # Subpart 4 is base64 with a trailing newline, which
209 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000210 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000211 b'This is a Base64 encoded message.\n')
212 # Subpart 5 has no Content-Transfer-Encoding: header.
213 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000214 b'This has no Content-Transfer-Encoding: header.\n')
215
216 def test_get_decoded_uu_payload(self):
217 eq = self.assertEqual
218 msg = Message()
219 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
220 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
221 msg['content-transfer-encoding'] = cte
222 eq(msg.get_payload(decode=True), b'hello world')
223 # Now try some bogus data
224 msg.set_payload('foo')
225 eq(msg.get_payload(decode=True), b'foo')
226
227 def test_decoded_generator(self):
228 eq = self.assertEqual
229 msg = self._msgobj('msg_07.txt')
230 with openfile('msg_17.txt') as fp:
231 text = fp.read()
232 s = StringIO()
233 g = DecodedGenerator(s)
234 g.flatten(msg)
235 eq(s.getvalue(), text)
236
237 def test__contains__(self):
238 msg = Message()
239 msg['From'] = 'Me'
240 msg['to'] = 'You'
241 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000242 self.assertTrue('from' in msg)
243 self.assertTrue('From' in msg)
244 self.assertTrue('FROM' in msg)
245 self.assertTrue('to' in msg)
246 self.assertTrue('To' in msg)
247 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000248
249 def test_as_string(self):
250 eq = self.ndiffAssertEqual
251 msg = self._msgobj('msg_01.txt')
252 with openfile('msg_01.txt') as fp:
253 text = fp.read()
254 eq(text, str(msg))
255 fullrepr = msg.as_string(unixfrom=True)
256 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000257 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000258 eq(text, NL.join(lines[1:]))
259
260 def test_bad_param(self):
261 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
262 self.assertEqual(msg.get_param('baz'), '')
263
264 def test_missing_filename(self):
265 msg = email.message_from_string("From: foo\n")
266 self.assertEqual(msg.get_filename(), None)
267
268 def test_bogus_filename(self):
269 msg = email.message_from_string(
270 "Content-Disposition: blarg; filename\n")
271 self.assertEqual(msg.get_filename(), '')
272
273 def test_missing_boundary(self):
274 msg = email.message_from_string("From: foo\n")
275 self.assertEqual(msg.get_boundary(), None)
276
277 def test_get_params(self):
278 eq = self.assertEqual
279 msg = email.message_from_string(
280 'X-Header: foo=one; bar=two; baz=three\n')
281 eq(msg.get_params(header='x-header'),
282 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
283 msg = email.message_from_string(
284 'X-Header: foo; bar=one; baz=two\n')
285 eq(msg.get_params(header='x-header'),
286 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
287 eq(msg.get_params(), None)
288 msg = email.message_from_string(
289 'X-Header: foo; bar="one"; baz=two\n')
290 eq(msg.get_params(header='x-header'),
291 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
292
293 def test_get_param_liberal(self):
294 msg = Message()
295 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
296 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
297
298 def test_get_param(self):
299 eq = self.assertEqual
300 msg = email.message_from_string(
301 "X-Header: foo=one; bar=two; baz=three\n")
302 eq(msg.get_param('bar', header='x-header'), 'two')
303 eq(msg.get_param('quuz', header='x-header'), None)
304 eq(msg.get_param('quuz'), None)
305 msg = email.message_from_string(
306 'X-Header: foo; bar="one"; baz=two\n')
307 eq(msg.get_param('foo', header='x-header'), '')
308 eq(msg.get_param('bar', header='x-header'), 'one')
309 eq(msg.get_param('baz', header='x-header'), 'two')
310 # XXX: We are not RFC-2045 compliant! We cannot parse:
311 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
312 # msg.get_param("weird")
313 # yet.
314
315 def test_get_param_funky_continuation_lines(self):
316 msg = self._msgobj('msg_22.txt')
317 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
318
319 def test_get_param_with_semis_in_quotes(self):
320 msg = email.message_from_string(
321 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
322 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
323 self.assertEqual(msg.get_param('name', unquote=False),
324 '"Jim&amp;&amp;Jill"')
325
R. David Murrayd48739f2010-04-14 18:59:18 +0000326 def test_get_param_with_quotes(self):
327 msg = email.message_from_string(
328 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
329 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
330 msg = email.message_from_string(
331 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
332 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
333
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000334 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000335 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000336 msg = email.message_from_string('Header: exists')
337 unless('header' in msg)
338 unless('Header' in msg)
339 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000340 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000341
342 def test_set_param(self):
343 eq = self.assertEqual
344 msg = Message()
345 msg.set_param('charset', 'iso-2022-jp')
346 eq(msg.get_param('charset'), 'iso-2022-jp')
347 msg.set_param('importance', 'high value')
348 eq(msg.get_param('importance'), 'high value')
349 eq(msg.get_param('importance', unquote=False), '"high value"')
350 eq(msg.get_params(), [('text/plain', ''),
351 ('charset', 'iso-2022-jp'),
352 ('importance', 'high value')])
353 eq(msg.get_params(unquote=False), [('text/plain', ''),
354 ('charset', '"iso-2022-jp"'),
355 ('importance', '"high value"')])
356 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
357 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
358
359 def test_del_param(self):
360 eq = self.assertEqual
361 msg = self._msgobj('msg_05.txt')
362 eq(msg.get_params(),
363 [('multipart/report', ''), ('report-type', 'delivery-status'),
364 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
365 old_val = msg.get_param("report-type")
366 msg.del_param("report-type")
367 eq(msg.get_params(),
368 [('multipart/report', ''),
369 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
370 msg.set_param("report-type", old_val)
371 eq(msg.get_params(),
372 [('multipart/report', ''),
373 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
374 ('report-type', old_val)])
375
376 def test_del_param_on_other_header(self):
377 msg = Message()
378 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
379 msg.del_param('filename', 'content-disposition')
380 self.assertEqual(msg['content-disposition'], 'attachment')
381
382 def test_set_type(self):
383 eq = self.assertEqual
384 msg = Message()
385 self.assertRaises(ValueError, msg.set_type, 'text')
386 msg.set_type('text/plain')
387 eq(msg['content-type'], 'text/plain')
388 msg.set_param('charset', 'us-ascii')
389 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
390 msg.set_type('text/html')
391 eq(msg['content-type'], 'text/html; charset="us-ascii"')
392
393 def test_set_type_on_other_header(self):
394 msg = Message()
395 msg['X-Content-Type'] = 'text/plain'
396 msg.set_type('application/octet-stream', 'X-Content-Type')
397 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
398
399 def test_get_content_type_missing(self):
400 msg = Message()
401 self.assertEqual(msg.get_content_type(), 'text/plain')
402
403 def test_get_content_type_missing_with_default_type(self):
404 msg = Message()
405 msg.set_default_type('message/rfc822')
406 self.assertEqual(msg.get_content_type(), 'message/rfc822')
407
408 def test_get_content_type_from_message_implicit(self):
409 msg = self._msgobj('msg_30.txt')
410 self.assertEqual(msg.get_payload(0).get_content_type(),
411 'message/rfc822')
412
413 def test_get_content_type_from_message_explicit(self):
414 msg = self._msgobj('msg_28.txt')
415 self.assertEqual(msg.get_payload(0).get_content_type(),
416 'message/rfc822')
417
418 def test_get_content_type_from_message_text_plain_implicit(self):
419 msg = self._msgobj('msg_03.txt')
420 self.assertEqual(msg.get_content_type(), 'text/plain')
421
422 def test_get_content_type_from_message_text_plain_explicit(self):
423 msg = self._msgobj('msg_01.txt')
424 self.assertEqual(msg.get_content_type(), 'text/plain')
425
426 def test_get_content_maintype_missing(self):
427 msg = Message()
428 self.assertEqual(msg.get_content_maintype(), 'text')
429
430 def test_get_content_maintype_missing_with_default_type(self):
431 msg = Message()
432 msg.set_default_type('message/rfc822')
433 self.assertEqual(msg.get_content_maintype(), 'message')
434
435 def test_get_content_maintype_from_message_implicit(self):
436 msg = self._msgobj('msg_30.txt')
437 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
438
439 def test_get_content_maintype_from_message_explicit(self):
440 msg = self._msgobj('msg_28.txt')
441 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
442
443 def test_get_content_maintype_from_message_text_plain_implicit(self):
444 msg = self._msgobj('msg_03.txt')
445 self.assertEqual(msg.get_content_maintype(), 'text')
446
447 def test_get_content_maintype_from_message_text_plain_explicit(self):
448 msg = self._msgobj('msg_01.txt')
449 self.assertEqual(msg.get_content_maintype(), 'text')
450
451 def test_get_content_subtype_missing(self):
452 msg = Message()
453 self.assertEqual(msg.get_content_subtype(), 'plain')
454
455 def test_get_content_subtype_missing_with_default_type(self):
456 msg = Message()
457 msg.set_default_type('message/rfc822')
458 self.assertEqual(msg.get_content_subtype(), 'rfc822')
459
460 def test_get_content_subtype_from_message_implicit(self):
461 msg = self._msgobj('msg_30.txt')
462 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
463
464 def test_get_content_subtype_from_message_explicit(self):
465 msg = self._msgobj('msg_28.txt')
466 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
467
468 def test_get_content_subtype_from_message_text_plain_implicit(self):
469 msg = self._msgobj('msg_03.txt')
470 self.assertEqual(msg.get_content_subtype(), 'plain')
471
472 def test_get_content_subtype_from_message_text_plain_explicit(self):
473 msg = self._msgobj('msg_01.txt')
474 self.assertEqual(msg.get_content_subtype(), 'plain')
475
476 def test_get_content_maintype_error(self):
477 msg = Message()
478 msg['Content-Type'] = 'no-slash-in-this-string'
479 self.assertEqual(msg.get_content_maintype(), 'text')
480
481 def test_get_content_subtype_error(self):
482 msg = Message()
483 msg['Content-Type'] = 'no-slash-in-this-string'
484 self.assertEqual(msg.get_content_subtype(), 'plain')
485
486 def test_replace_header(self):
487 eq = self.assertEqual
488 msg = Message()
489 msg.add_header('First', 'One')
490 msg.add_header('Second', 'Two')
491 msg.add_header('Third', 'Three')
492 eq(msg.keys(), ['First', 'Second', 'Third'])
493 eq(msg.values(), ['One', 'Two', 'Three'])
494 msg.replace_header('Second', 'Twenty')
495 eq(msg.keys(), ['First', 'Second', 'Third'])
496 eq(msg.values(), ['One', 'Twenty', 'Three'])
497 msg.add_header('First', 'Eleven')
498 msg.replace_header('First', 'One Hundred')
499 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
500 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
501 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
502
503 def test_broken_base64_payload(self):
504 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
505 msg = Message()
506 msg['content-type'] = 'audio/x-midi'
507 msg['content-transfer-encoding'] = 'base64'
508 msg.set_payload(x)
509 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000510 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000511
512
513
514# Test the email.encoders module
515class TestEncoders(unittest.TestCase):
516 def test_encode_empty_payload(self):
517 eq = self.assertEqual
518 msg = Message()
519 msg.set_charset('us-ascii')
520 eq(msg['content-transfer-encoding'], '7bit')
521
522 def test_default_cte(self):
523 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000524 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000525 msg = MIMEText('hello world')
526 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000527 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000528 msg = MIMEText('hello \xf8 world')
529 eq(msg['content-transfer-encoding'], '8bit')
530 # And now with a different charset
531 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
532 eq(msg['content-transfer-encoding'], 'quoted-printable')
533
R. David Murraye85200d2010-05-06 01:41:14 +0000534 def test_encode7or8bit(self):
535 # Make sure a charset whose input character set is 8bit but
536 # whose output character set is 7bit gets a transfer-encoding
537 # of 7bit.
538 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000539 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000540 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000541
542
543# Test long header wrapping
544class TestLongHeaders(TestEmailBase):
545 def test_split_long_continuation(self):
546 eq = self.ndiffAssertEqual
547 msg = email.message_from_string("""\
548Subject: bug demonstration
549\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
550\tmore text
551
552test
553""")
554 sfp = StringIO()
555 g = Generator(sfp)
556 g.flatten(msg)
557 eq(sfp.getvalue(), """\
558Subject: bug demonstration
559\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
560\tmore text
561
562test
563""")
564
565 def test_another_long_almost_unsplittable_header(self):
566 eq = self.ndiffAssertEqual
567 hstr = """\
568bug demonstration
569\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
570\tmore text"""
571 h = Header(hstr, continuation_ws='\t')
572 eq(h.encode(), """\
573bug demonstration
574\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
575\tmore text""")
576 h = Header(hstr.replace('\t', ' '))
577 eq(h.encode(), """\
578bug demonstration
579 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
580 more text""")
581
582 def test_long_nonstring(self):
583 eq = self.ndiffAssertEqual
584 g = Charset("iso-8859-1")
585 cz = Charset("iso-8859-2")
586 utf8 = Charset("utf-8")
587 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
588 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
589 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
590 b'bef\xf6rdert. ')
591 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
592 b'd\xf9vtipu.. ')
593 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
594 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
595 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
596 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
597 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
598 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
599 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
600 '\u3044\u307e\u3059\u3002')
601 h = Header(g_head, g, header_name='Subject')
602 h.append(cz_head, cz)
603 h.append(utf8_head, utf8)
604 msg = Message()
605 msg['Subject'] = h
606 sfp = StringIO()
607 g = Generator(sfp)
608 g.flatten(msg)
609 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000610Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
611 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
612 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
613 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
614 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
615 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
616 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
617 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
618 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
619 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
620 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000621
622""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000623 eq(h.encode(maxlinelen=76), """\
624=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
625 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
626 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
627 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
628 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
629 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
630 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
631 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
632 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
633 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
634 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000635
636 def test_long_header_encode(self):
637 eq = self.ndiffAssertEqual
638 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
639 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
640 header_name='X-Foobar-Spoink-Defrobnit')
641 eq(h.encode(), '''\
642wasnipoop; giraffes="very-long-necked-animals";
643 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
644
645 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
646 eq = self.ndiffAssertEqual
647 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
648 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
649 header_name='X-Foobar-Spoink-Defrobnit',
650 continuation_ws='\t')
651 eq(h.encode(), '''\
652wasnipoop; giraffes="very-long-necked-animals";
653 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
654
655 def test_long_header_encode_with_tab_continuation(self):
656 eq = self.ndiffAssertEqual
657 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
658 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
659 header_name='X-Foobar-Spoink-Defrobnit',
660 continuation_ws='\t')
661 eq(h.encode(), '''\
662wasnipoop; giraffes="very-long-necked-animals";
663\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
664
665 def test_header_splitter(self):
666 eq = self.ndiffAssertEqual
667 msg = MIMEText('')
668 # It'd be great if we could use add_header() here, but that doesn't
669 # guarantee an order of the parameters.
670 msg['X-Foobar-Spoink-Defrobnit'] = (
671 'wasnipoop; giraffes="very-long-necked-animals"; '
672 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
673 sfp = StringIO()
674 g = Generator(sfp)
675 g.flatten(msg)
676 eq(sfp.getvalue(), '''\
677Content-Type: text/plain; charset="us-ascii"
678MIME-Version: 1.0
679Content-Transfer-Encoding: 7bit
680X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
681 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
682
683''')
684
685 def test_no_semis_header_splitter(self):
686 eq = self.ndiffAssertEqual
687 msg = Message()
688 msg['From'] = 'test@dom.ain'
689 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
690 msg.set_payload('Test')
691 sfp = StringIO()
692 g = Generator(sfp)
693 g.flatten(msg)
694 eq(sfp.getvalue(), """\
695From: test@dom.ain
696References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
697 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
698
699Test""")
700
701 def test_no_split_long_header(self):
702 eq = self.ndiffAssertEqual
703 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000704 h = Header(hstr)
705 # These come on two lines because Headers are really field value
706 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000707 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000708References:
709 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
710 h = Header('x' * 80)
711 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000712
713 def test_splitting_multiple_long_lines(self):
714 eq = self.ndiffAssertEqual
715 hstr = """\
716from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
717\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
718\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
719"""
720 h = Header(hstr, continuation_ws='\t')
721 eq(h.encode(), """\
722from babylon.socal-raves.org (localhost [127.0.0.1]);
723 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
724 for <mailman-admin@babylon.socal-raves.org>;
725 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
726\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
727 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
728 for <mailman-admin@babylon.socal-raves.org>;
729 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
730\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
731 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
732 for <mailman-admin@babylon.socal-raves.org>;
733 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
734
735 def test_splitting_first_line_only_is_long(self):
736 eq = self.ndiffAssertEqual
737 hstr = """\
738from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
739\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
740\tid 17k4h5-00034i-00
741\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
742 h = Header(hstr, maxlinelen=78, header_name='Received',
743 continuation_ws='\t')
744 eq(h.encode(), """\
745from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
746 helo=cthulhu.gerg.ca)
747\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
748\tid 17k4h5-00034i-00
749\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
750
751 def test_long_8bit_header(self):
752 eq = self.ndiffAssertEqual
753 msg = Message()
754 h = Header('Britische Regierung gibt', 'iso-8859-1',
755 header_name='Subject')
756 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +0000757 eq(h.encode(maxlinelen=76), """\
758=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
759 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000760 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +0000761 eq(msg.as_string(maxheaderlen=76), """\
762Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
763 =?iso-8859-1?q?hore-Windkraftprojekte?=
764
765""")
766 eq(msg.as_string(maxheaderlen=0), """\
767Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000768
769""")
770
771 def test_long_8bit_header_no_charset(self):
772 eq = self.ndiffAssertEqual
773 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +0000774 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
775 'f\xfcr Offshore-Windkraftprojekte '
776 '<a-very-long-address@example.com>')
777 msg['Reply-To'] = header_string
778 self.assertRaises(UnicodeEncodeError, msg.as_string)
779 msg = Message()
780 msg['Reply-To'] = Header(header_string, 'utf-8',
781 header_name='Reply-To')
782 eq(msg.as_string(maxheaderlen=78), """\
783Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
784 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000785
786""")
787
788 def test_long_to_header(self):
789 eq = self.ndiffAssertEqual
790 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
791 '<someone@eecs.umich.edu>,'
792 '"Someone Test #B" <someone@umich.edu>, '
793 '"Someone Test #C" <someone@eecs.umich.edu>, '
794 '"Someone Test #D" <someone@eecs.umich.edu>')
795 msg = Message()
796 msg['To'] = to
797 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +0000798To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000799 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +0000800 "Someone Test #C" <someone@eecs.umich.edu>,
801 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000802
803''')
804
805 def test_long_line_after_append(self):
806 eq = self.ndiffAssertEqual
807 s = 'This is an example of string which has almost the limit of header length.'
808 h = Header(s)
809 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +0000810 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000811This is an example of string which has almost the limit of header length.
812 Add another line.""")
813
814 def test_shorter_line_with_append(self):
815 eq = self.ndiffAssertEqual
816 s = 'This is a shorter line.'
817 h = Header(s)
818 h.append('Add another sentence. (Surprise?)')
819 eq(h.encode(),
820 'This is a shorter line. Add another sentence. (Surprise?)')
821
822 def test_long_field_name(self):
823 eq = self.ndiffAssertEqual
824 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +0000825 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
826 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
827 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
828 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000829 h = Header(gs, 'iso-8859-1', header_name=fn)
830 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +0000831 eq(h.encode(maxlinelen=76), """\
832=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
833 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
834 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
835 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000836
837 def test_long_received_header(self):
838 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
839 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
840 'Wed, 05 Mar 2003 18:10:18 -0700')
841 msg = Message()
842 msg['Received-1'] = Header(h, continuation_ws='\t')
843 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000844 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000845 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000846Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
847 Wed, 05 Mar 2003 18:10:18 -0700
848Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
849 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000850
851""")
852
853 def test_string_headerinst_eq(self):
854 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
855 'tu-muenchen.de> (David Bremner\'s message of '
856 '"Thu, 6 Mar 2003 13:58:21 +0100")')
857 msg = Message()
858 msg['Received-1'] = Header(h, header_name='Received-1',
859 continuation_ws='\t')
860 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000861 # XXX This should be splitting on spaces not commas.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000862 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000863Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
864 6 Mar 2003 13:58:21 +0100\")
865Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
866 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000867
868""")
869
870 def test_long_unbreakable_lines_with_continuation(self):
871 eq = self.ndiffAssertEqual
872 msg = Message()
873 t = """\
874iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
875 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
876 msg['Face-1'] = t
877 msg['Face-2'] = Header(t, header_name='Face-2')
Barry Warsawbef9d212007-08-31 10:55:37 +0000878 # XXX This splitting is all wrong. It the first value line should be
879 # snug against the field name.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000880 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +0000881Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000882 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000883 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +0000884Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +0000885 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000886 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
887
888""")
889
890 def test_another_long_multiline_header(self):
891 eq = self.ndiffAssertEqual
892 m = ('Received: from siimage.com '
893 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +0000894 'Microsoft SMTPSVC(5.0.2195.4905); '
895 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000896 msg = email.message_from_string(m)
897 eq(msg.as_string(maxheaderlen=78), '''\
Barry Warsawbef9d212007-08-31 10:55:37 +0000898Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
899 Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000900
901''')
902
903 def test_long_lines_with_different_header(self):
904 eq = self.ndiffAssertEqual
905 h = ('List-Unsubscribe: '
906 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
907 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
908 '?subject=unsubscribe>')
909 msg = Message()
910 msg['List'] = h
911 msg['List'] = Header(h, header_name='List')
912 eq(msg.as_string(maxheaderlen=78), """\
913List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000914 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000915List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000916 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000917
918""")
919
920
921
922# Test mangling of "From " lines in the body of a message
923class TestFromMangling(unittest.TestCase):
924 def setUp(self):
925 self.msg = Message()
926 self.msg['From'] = 'aaa@bbb.org'
927 self.msg.set_payload("""\
928From the desk of A.A.A.:
929Blah blah blah
930""")
931
932 def test_mangled_from(self):
933 s = StringIO()
934 g = Generator(s, mangle_from_=True)
935 g.flatten(self.msg)
936 self.assertEqual(s.getvalue(), """\
937From: aaa@bbb.org
938
939>From the desk of A.A.A.:
940Blah blah blah
941""")
942
943 def test_dont_mangle_from(self):
944 s = StringIO()
945 g = Generator(s, mangle_from_=False)
946 g.flatten(self.msg)
947 self.assertEqual(s.getvalue(), """\
948From: aaa@bbb.org
949
950From the desk of A.A.A.:
951Blah blah blah
952""")
953
954
955
956# Test the basic MIMEAudio class
957class TestMIMEAudio(unittest.TestCase):
958 def setUp(self):
959 # Make sure we pick up the audiotest.au that lives in email/test/data.
960 # In Python, there's an audiotest.au living in Lib/test but that isn't
961 # included in some binary distros that don't include the test
962 # package. The trailing empty string on the .join() is significant
963 # since findfile() will do a dirname().
964 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
965 with open(findfile('audiotest.au', datadir), 'rb') as fp:
966 self._audiodata = fp.read()
967 self._au = MIMEAudio(self._audiodata)
968
969 def test_guess_minor_type(self):
970 self.assertEqual(self._au.get_content_type(), 'audio/basic')
971
972 def test_encoding(self):
973 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +0000974 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
975 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000976
977 def test_checkSetMinor(self):
978 au = MIMEAudio(self._audiodata, 'fish')
979 self.assertEqual(au.get_content_type(), 'audio/fish')
980
981 def test_add_header(self):
982 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000983 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000984 self._au.add_header('Content-Disposition', 'attachment',
985 filename='audiotest.au')
986 eq(self._au['content-disposition'],
987 'attachment; filename="audiotest.au"')
988 eq(self._au.get_params(header='content-disposition'),
989 [('attachment', ''), ('filename', 'audiotest.au')])
990 eq(self._au.get_param('filename', header='content-disposition'),
991 'audiotest.au')
992 missing = []
993 eq(self._au.get_param('attachment', header='content-disposition'), '')
994 unless(self._au.get_param('foo', failobj=missing,
995 header='content-disposition') is missing)
996 # Try some missing stuff
997 unless(self._au.get_param('foobar', missing) is missing)
998 unless(self._au.get_param('attachment', missing,
999 header='foobar') is missing)
1000
1001
1002
1003# Test the basic MIMEImage class
1004class TestMIMEImage(unittest.TestCase):
1005 def setUp(self):
1006 with openfile('PyBanner048.gif', 'rb') as fp:
1007 self._imgdata = fp.read()
1008 self._im = MIMEImage(self._imgdata)
1009
1010 def test_guess_minor_type(self):
1011 self.assertEqual(self._im.get_content_type(), 'image/gif')
1012
1013 def test_encoding(self):
1014 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001015 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1016 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001017
1018 def test_checkSetMinor(self):
1019 im = MIMEImage(self._imgdata, 'fish')
1020 self.assertEqual(im.get_content_type(), 'image/fish')
1021
1022 def test_add_header(self):
1023 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001024 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001025 self._im.add_header('Content-Disposition', 'attachment',
1026 filename='dingusfish.gif')
1027 eq(self._im['content-disposition'],
1028 'attachment; filename="dingusfish.gif"')
1029 eq(self._im.get_params(header='content-disposition'),
1030 [('attachment', ''), ('filename', 'dingusfish.gif')])
1031 eq(self._im.get_param('filename', header='content-disposition'),
1032 'dingusfish.gif')
1033 missing = []
1034 eq(self._im.get_param('attachment', header='content-disposition'), '')
1035 unless(self._im.get_param('foo', failobj=missing,
1036 header='content-disposition') is missing)
1037 # Try some missing stuff
1038 unless(self._im.get_param('foobar', missing) is missing)
1039 unless(self._im.get_param('attachment', missing,
1040 header='foobar') is missing)
1041
1042
1043
1044# Test the basic MIMEApplication class
1045class TestMIMEApplication(unittest.TestCase):
1046 def test_headers(self):
1047 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001048 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001049 eq(msg.get_content_type(), 'application/octet-stream')
1050 eq(msg['content-transfer-encoding'], 'base64')
1051
1052 def test_body(self):
1053 eq = self.assertEqual
Barry Warsaw8c571042007-08-30 19:17:18 +00001054 bytes = b'\xfa\xfb\xfc\xfd\xfe\xff'
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001055 msg = MIMEApplication(bytes)
R. David Murray7da8f062010-06-04 16:11:08 +00001056 eq(msg.get_payload(), '+vv8/f7/')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001057 eq(msg.get_payload(decode=True), bytes)
1058
1059
1060
1061# Test the basic MIMEText class
1062class TestMIMEText(unittest.TestCase):
1063 def setUp(self):
1064 self._msg = MIMEText('hello there')
1065
1066 def test_types(self):
1067 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001068 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001069 eq(self._msg.get_content_type(), 'text/plain')
1070 eq(self._msg.get_param('charset'), 'us-ascii')
1071 missing = []
1072 unless(self._msg.get_param('foobar', missing) is missing)
1073 unless(self._msg.get_param('charset', missing, header='foobar')
1074 is missing)
1075
1076 def test_payload(self):
1077 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001078 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001079
1080 def test_charset(self):
1081 eq = self.assertEqual
1082 msg = MIMEText('hello there', _charset='us-ascii')
1083 eq(msg.get_charset().input_charset, 'us-ascii')
1084 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1085
R. David Murray850fc852010-06-03 01:58:28 +00001086 def test_7bit_input(self):
1087 eq = self.assertEqual
1088 msg = MIMEText('hello there', _charset='us-ascii')
1089 eq(msg.get_charset().input_charset, 'us-ascii')
1090 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1091
1092 def test_7bit_input_no_charset(self):
1093 eq = self.assertEqual
1094 msg = MIMEText('hello there')
1095 eq(msg.get_charset(), 'us-ascii')
1096 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1097 self.assertTrue('hello there' in msg.as_string())
1098
1099 def test_utf8_input(self):
1100 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1101 eq = self.assertEqual
1102 msg = MIMEText(teststr, _charset='utf-8')
1103 eq(msg.get_charset().output_charset, 'utf-8')
1104 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1105 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1106
1107 @unittest.skip("can't fix because of backward compat in email5, "
1108 "will fix in email6")
1109 def test_utf8_input_no_charset(self):
1110 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1111 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1112
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001113
1114
1115# Test complicated multipart/* messages
1116class TestMultipart(TestEmailBase):
1117 def setUp(self):
1118 with openfile('PyBanner048.gif', 'rb') as fp:
1119 data = fp.read()
1120 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1121 image = MIMEImage(data, name='dingusfish.gif')
1122 image.add_header('content-disposition', 'attachment',
1123 filename='dingusfish.gif')
1124 intro = MIMEText('''\
1125Hi there,
1126
1127This is the dingus fish.
1128''')
1129 container.attach(intro)
1130 container.attach(image)
1131 container['From'] = 'Barry <barry@digicool.com>'
1132 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1133 container['Subject'] = 'Here is your dingus fish'
1134
1135 now = 987809702.54848599
1136 timetuple = time.localtime(now)
1137 if timetuple[-1] == 0:
1138 tzsecs = time.timezone
1139 else:
1140 tzsecs = time.altzone
1141 if tzsecs > 0:
1142 sign = '-'
1143 else:
1144 sign = '+'
1145 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1146 container['Date'] = time.strftime(
1147 '%a, %d %b %Y %H:%M:%S',
1148 time.localtime(now)) + tzoffset
1149 self._msg = container
1150 self._im = image
1151 self._txt = intro
1152
1153 def test_hierarchy(self):
1154 # convenience
1155 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001156 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001157 raises = self.assertRaises
1158 # tests
1159 m = self._msg
1160 unless(m.is_multipart())
1161 eq(m.get_content_type(), 'multipart/mixed')
1162 eq(len(m.get_payload()), 2)
1163 raises(IndexError, m.get_payload, 2)
1164 m0 = m.get_payload(0)
1165 m1 = m.get_payload(1)
1166 unless(m0 is self._txt)
1167 unless(m1 is self._im)
1168 eq(m.get_payload(), [m0, m1])
1169 unless(not m0.is_multipart())
1170 unless(not m1.is_multipart())
1171
1172 def test_empty_multipart_idempotent(self):
1173 text = """\
1174Content-Type: multipart/mixed; boundary="BOUNDARY"
1175MIME-Version: 1.0
1176Subject: A subject
1177To: aperson@dom.ain
1178From: bperson@dom.ain
1179
1180
1181--BOUNDARY
1182
1183
1184--BOUNDARY--
1185"""
1186 msg = Parser().parsestr(text)
1187 self.ndiffAssertEqual(text, msg.as_string())
1188
1189 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1190 outer = MIMEBase('multipart', 'mixed')
1191 outer['Subject'] = 'A subject'
1192 outer['To'] = 'aperson@dom.ain'
1193 outer['From'] = 'bperson@dom.ain'
1194 outer.set_boundary('BOUNDARY')
1195 self.ndiffAssertEqual(outer.as_string(), '''\
1196Content-Type: multipart/mixed; boundary="BOUNDARY"
1197MIME-Version: 1.0
1198Subject: A subject
1199To: aperson@dom.ain
1200From: bperson@dom.ain
1201
1202--BOUNDARY
1203
1204--BOUNDARY--''')
1205
1206 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1207 outer = MIMEBase('multipart', 'mixed')
1208 outer['Subject'] = 'A subject'
1209 outer['To'] = 'aperson@dom.ain'
1210 outer['From'] = 'bperson@dom.ain'
1211 outer.preamble = ''
1212 outer.epilogue = ''
1213 outer.set_boundary('BOUNDARY')
1214 self.ndiffAssertEqual(outer.as_string(), '''\
1215Content-Type: multipart/mixed; boundary="BOUNDARY"
1216MIME-Version: 1.0
1217Subject: A subject
1218To: aperson@dom.ain
1219From: bperson@dom.ain
1220
1221
1222--BOUNDARY
1223
1224--BOUNDARY--
1225''')
1226
1227 def test_one_part_in_a_multipart(self):
1228 eq = self.ndiffAssertEqual
1229 outer = MIMEBase('multipart', 'mixed')
1230 outer['Subject'] = 'A subject'
1231 outer['To'] = 'aperson@dom.ain'
1232 outer['From'] = 'bperson@dom.ain'
1233 outer.set_boundary('BOUNDARY')
1234 msg = MIMEText('hello world')
1235 outer.attach(msg)
1236 eq(outer.as_string(), '''\
1237Content-Type: multipart/mixed; boundary="BOUNDARY"
1238MIME-Version: 1.0
1239Subject: A subject
1240To: aperson@dom.ain
1241From: bperson@dom.ain
1242
1243--BOUNDARY
1244Content-Type: text/plain; charset="us-ascii"
1245MIME-Version: 1.0
1246Content-Transfer-Encoding: 7bit
1247
1248hello world
1249--BOUNDARY--''')
1250
1251 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1252 eq = self.ndiffAssertEqual
1253 outer = MIMEBase('multipart', 'mixed')
1254 outer['Subject'] = 'A subject'
1255 outer['To'] = 'aperson@dom.ain'
1256 outer['From'] = 'bperson@dom.ain'
1257 outer.preamble = ''
1258 msg = MIMEText('hello world')
1259 outer.attach(msg)
1260 outer.set_boundary('BOUNDARY')
1261 eq(outer.as_string(), '''\
1262Content-Type: multipart/mixed; boundary="BOUNDARY"
1263MIME-Version: 1.0
1264Subject: A subject
1265To: aperson@dom.ain
1266From: bperson@dom.ain
1267
1268
1269--BOUNDARY
1270Content-Type: text/plain; charset="us-ascii"
1271MIME-Version: 1.0
1272Content-Transfer-Encoding: 7bit
1273
1274hello world
1275--BOUNDARY--''')
1276
1277
1278 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1279 eq = self.ndiffAssertEqual
1280 outer = MIMEBase('multipart', 'mixed')
1281 outer['Subject'] = 'A subject'
1282 outer['To'] = 'aperson@dom.ain'
1283 outer['From'] = 'bperson@dom.ain'
1284 outer.preamble = None
1285 msg = MIMEText('hello world')
1286 outer.attach(msg)
1287 outer.set_boundary('BOUNDARY')
1288 eq(outer.as_string(), '''\
1289Content-Type: multipart/mixed; boundary="BOUNDARY"
1290MIME-Version: 1.0
1291Subject: A subject
1292To: aperson@dom.ain
1293From: bperson@dom.ain
1294
1295--BOUNDARY
1296Content-Type: text/plain; charset="us-ascii"
1297MIME-Version: 1.0
1298Content-Transfer-Encoding: 7bit
1299
1300hello world
1301--BOUNDARY--''')
1302
1303
1304 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1305 eq = self.ndiffAssertEqual
1306 outer = MIMEBase('multipart', 'mixed')
1307 outer['Subject'] = 'A subject'
1308 outer['To'] = 'aperson@dom.ain'
1309 outer['From'] = 'bperson@dom.ain'
1310 outer.epilogue = None
1311 msg = MIMEText('hello world')
1312 outer.attach(msg)
1313 outer.set_boundary('BOUNDARY')
1314 eq(outer.as_string(), '''\
1315Content-Type: multipart/mixed; boundary="BOUNDARY"
1316MIME-Version: 1.0
1317Subject: A subject
1318To: aperson@dom.ain
1319From: bperson@dom.ain
1320
1321--BOUNDARY
1322Content-Type: text/plain; charset="us-ascii"
1323MIME-Version: 1.0
1324Content-Transfer-Encoding: 7bit
1325
1326hello world
1327--BOUNDARY--''')
1328
1329
1330 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1331 eq = self.ndiffAssertEqual
1332 outer = MIMEBase('multipart', 'mixed')
1333 outer['Subject'] = 'A subject'
1334 outer['To'] = 'aperson@dom.ain'
1335 outer['From'] = 'bperson@dom.ain'
1336 outer.epilogue = ''
1337 msg = MIMEText('hello world')
1338 outer.attach(msg)
1339 outer.set_boundary('BOUNDARY')
1340 eq(outer.as_string(), '''\
1341Content-Type: multipart/mixed; boundary="BOUNDARY"
1342MIME-Version: 1.0
1343Subject: A subject
1344To: aperson@dom.ain
1345From: bperson@dom.ain
1346
1347--BOUNDARY
1348Content-Type: text/plain; charset="us-ascii"
1349MIME-Version: 1.0
1350Content-Transfer-Encoding: 7bit
1351
1352hello world
1353--BOUNDARY--
1354''')
1355
1356
1357 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1358 eq = self.ndiffAssertEqual
1359 outer = MIMEBase('multipart', 'mixed')
1360 outer['Subject'] = 'A subject'
1361 outer['To'] = 'aperson@dom.ain'
1362 outer['From'] = 'bperson@dom.ain'
1363 outer.epilogue = '\n'
1364 msg = MIMEText('hello world')
1365 outer.attach(msg)
1366 outer.set_boundary('BOUNDARY')
1367 eq(outer.as_string(), '''\
1368Content-Type: multipart/mixed; boundary="BOUNDARY"
1369MIME-Version: 1.0
1370Subject: A subject
1371To: aperson@dom.ain
1372From: bperson@dom.ain
1373
1374--BOUNDARY
1375Content-Type: text/plain; charset="us-ascii"
1376MIME-Version: 1.0
1377Content-Transfer-Encoding: 7bit
1378
1379hello world
1380--BOUNDARY--
1381
1382''')
1383
1384 def test_message_external_body(self):
1385 eq = self.assertEqual
1386 msg = self._msgobj('msg_36.txt')
1387 eq(len(msg.get_payload()), 2)
1388 msg1 = msg.get_payload(1)
1389 eq(msg1.get_content_type(), 'multipart/alternative')
1390 eq(len(msg1.get_payload()), 2)
1391 for subpart in msg1.get_payload():
1392 eq(subpart.get_content_type(), 'message/external-body')
1393 eq(len(subpart.get_payload()), 1)
1394 subsubpart = subpart.get_payload(0)
1395 eq(subsubpart.get_content_type(), 'text/plain')
1396
1397 def test_double_boundary(self):
1398 # msg_37.txt is a multipart that contains two dash-boundary's in a
1399 # row. Our interpretation of RFC 2046 calls for ignoring the second
1400 # and subsequent boundaries.
1401 msg = self._msgobj('msg_37.txt')
1402 self.assertEqual(len(msg.get_payload()), 3)
1403
1404 def test_nested_inner_contains_outer_boundary(self):
1405 eq = self.ndiffAssertEqual
1406 # msg_38.txt has an inner part that contains outer boundaries. My
1407 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1408 # these are illegal and should be interpreted as unterminated inner
1409 # parts.
1410 msg = self._msgobj('msg_38.txt')
1411 sfp = StringIO()
1412 iterators._structure(msg, sfp)
1413 eq(sfp.getvalue(), """\
1414multipart/mixed
1415 multipart/mixed
1416 multipart/alternative
1417 text/plain
1418 text/plain
1419 text/plain
1420 text/plain
1421""")
1422
1423 def test_nested_with_same_boundary(self):
1424 eq = self.ndiffAssertEqual
1425 # msg 39.txt is similarly evil in that it's got inner parts that use
1426 # the same boundary as outer parts. Again, I believe the way this is
1427 # parsed is closest to the spirit of RFC 2046
1428 msg = self._msgobj('msg_39.txt')
1429 sfp = StringIO()
1430 iterators._structure(msg, sfp)
1431 eq(sfp.getvalue(), """\
1432multipart/mixed
1433 multipart/mixed
1434 multipart/alternative
1435 application/octet-stream
1436 application/octet-stream
1437 text/plain
1438""")
1439
1440 def test_boundary_in_non_multipart(self):
1441 msg = self._msgobj('msg_40.txt')
1442 self.assertEqual(msg.as_string(), '''\
1443MIME-Version: 1.0
1444Content-Type: text/html; boundary="--961284236552522269"
1445
1446----961284236552522269
1447Content-Type: text/html;
1448Content-Transfer-Encoding: 7Bit
1449
1450<html></html>
1451
1452----961284236552522269--
1453''')
1454
1455 def test_boundary_with_leading_space(self):
1456 eq = self.assertEqual
1457 msg = email.message_from_string('''\
1458MIME-Version: 1.0
1459Content-Type: multipart/mixed; boundary=" XXXX"
1460
1461-- XXXX
1462Content-Type: text/plain
1463
1464
1465-- XXXX
1466Content-Type: text/plain
1467
1468-- XXXX--
1469''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001470 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001471 eq(msg.get_boundary(), ' XXXX')
1472 eq(len(msg.get_payload()), 2)
1473
1474 def test_boundary_without_trailing_newline(self):
1475 m = Parser().parsestr("""\
1476Content-Type: multipart/mixed; boundary="===============0012394164=="
1477MIME-Version: 1.0
1478
1479--===============0012394164==
1480Content-Type: image/file1.jpg
1481MIME-Version: 1.0
1482Content-Transfer-Encoding: base64
1483
1484YXNkZg==
1485--===============0012394164==--""")
1486 self.assertEquals(m.get_payload(0).get_payload(), 'YXNkZg==')
1487
1488
1489
1490# Test some badly formatted messages
1491class TestNonConformant(TestEmailBase):
1492 def test_parse_missing_minor_type(self):
1493 eq = self.assertEqual
1494 msg = self._msgobj('msg_14.txt')
1495 eq(msg.get_content_type(), 'text/plain')
1496 eq(msg.get_content_maintype(), 'text')
1497 eq(msg.get_content_subtype(), 'plain')
1498
1499 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001500 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001501 msg = self._msgobj('msg_15.txt')
1502 # XXX We can probably eventually do better
1503 inner = msg.get_payload(0)
1504 unless(hasattr(inner, 'defects'))
1505 self.assertEqual(len(inner.defects), 1)
1506 unless(isinstance(inner.defects[0],
1507 errors.StartBoundaryNotFoundDefect))
1508
1509 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001510 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001511 msg = self._msgobj('msg_25.txt')
1512 unless(isinstance(msg.get_payload(), str))
1513 self.assertEqual(len(msg.defects), 2)
1514 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1515 unless(isinstance(msg.defects[1],
1516 errors.MultipartInvariantViolationDefect))
1517
1518 def test_invalid_content_type(self):
1519 eq = self.assertEqual
1520 neq = self.ndiffAssertEqual
1521 msg = Message()
1522 # RFC 2045, $5.2 says invalid yields text/plain
1523 msg['Content-Type'] = 'text'
1524 eq(msg.get_content_maintype(), 'text')
1525 eq(msg.get_content_subtype(), 'plain')
1526 eq(msg.get_content_type(), 'text/plain')
1527 # Clear the old value and try something /really/ invalid
1528 del msg['content-type']
1529 msg['Content-Type'] = 'foo'
1530 eq(msg.get_content_maintype(), 'text')
1531 eq(msg.get_content_subtype(), 'plain')
1532 eq(msg.get_content_type(), 'text/plain')
1533 # Still, make sure that the message is idempotently generated
1534 s = StringIO()
1535 g = Generator(s)
1536 g.flatten(msg)
1537 neq(s.getvalue(), 'Content-Type: foo\n\n')
1538
1539 def test_no_start_boundary(self):
1540 eq = self.ndiffAssertEqual
1541 msg = self._msgobj('msg_31.txt')
1542 eq(msg.get_payload(), """\
1543--BOUNDARY
1544Content-Type: text/plain
1545
1546message 1
1547
1548--BOUNDARY
1549Content-Type: text/plain
1550
1551message 2
1552
1553--BOUNDARY--
1554""")
1555
1556 def test_no_separating_blank_line(self):
1557 eq = self.ndiffAssertEqual
1558 msg = self._msgobj('msg_35.txt')
1559 eq(msg.as_string(), """\
1560From: aperson@dom.ain
1561To: bperson@dom.ain
1562Subject: here's something interesting
1563
1564counter to RFC 2822, there's no separating newline here
1565""")
1566
1567 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001568 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001569 msg = self._msgobj('msg_41.txt')
1570 unless(hasattr(msg, 'defects'))
1571 self.assertEqual(len(msg.defects), 2)
1572 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1573 unless(isinstance(msg.defects[1],
1574 errors.MultipartInvariantViolationDefect))
1575
1576 def test_missing_start_boundary(self):
1577 outer = self._msgobj('msg_42.txt')
1578 # The message structure is:
1579 #
1580 # multipart/mixed
1581 # text/plain
1582 # message/rfc822
1583 # multipart/mixed [*]
1584 #
1585 # [*] This message is missing its start boundary
1586 bad = outer.get_payload(1).get_payload(0)
1587 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001588 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001589 errors.StartBoundaryNotFoundDefect))
1590
1591 def test_first_line_is_continuation_header(self):
1592 eq = self.assertEqual
1593 m = ' Line 1\nLine 2\nLine 3'
1594 msg = email.message_from_string(m)
1595 eq(msg.keys(), [])
1596 eq(msg.get_payload(), 'Line 2\nLine 3')
1597 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001598 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001599 errors.FirstHeaderLineIsContinuationDefect))
1600 eq(msg.defects[0].line, ' Line 1\n')
1601
1602
1603
1604# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001605class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001606 def test_rfc2047_multiline(self):
1607 eq = self.assertEqual
1608 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1609 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1610 dh = decode_header(s)
1611 eq(dh, [
1612 (b'Re:', None),
1613 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1614 (b'baz foo bar', None),
1615 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1616 header = make_header(dh)
1617 eq(str(header),
1618 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001619 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001620Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1621 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001622
1623 def test_whitespace_eater_unicode(self):
1624 eq = self.assertEqual
1625 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1626 dh = decode_header(s)
1627 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1628 (b'Pirard <pirard@dom.ain>', None)])
1629 header = str(make_header(dh))
1630 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1631
1632 def test_whitespace_eater_unicode_2(self):
1633 eq = self.assertEqual
1634 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1635 dh = decode_header(s)
1636 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1637 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1638 hu = str(make_header(dh))
1639 eq(hu, 'The quick brown fox jumped over the lazy dog')
1640
1641 def test_rfc2047_missing_whitespace(self):
1642 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1643 dh = decode_header(s)
1644 self.assertEqual(dh, [(s, None)])
1645
1646 def test_rfc2047_with_whitespace(self):
1647 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1648 dh = decode_header(s)
1649 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1650 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1651 (b'sbord', None)])
1652
R. David Murrayc4e69cc2010-08-03 22:14:10 +00001653 def test_rfc2047_B_bad_padding(self):
1654 s = '=?iso-8859-1?B?%s?='
1655 data = [ # only test complete bytes
1656 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
1657 ('dmk=', b'vi'), ('dmk', b'vi')
1658 ]
1659 for q, a in data:
1660 dh = decode_header(s % q)
1661 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001662
R. David Murray31e984c2010-10-01 15:40:20 +00001663 def test_rfc2047_Q_invalid_digits(self):
1664 # issue 10004.
1665 s = '=?iso-8659-1?Q?andr=e9=zz?='
1666 self.assertEqual(decode_header(s),
1667 [(b'andr\xe9=zz', 'iso-8659-1')])
1668
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001669
1670# Test the MIMEMessage class
1671class TestMIMEMessage(TestEmailBase):
1672 def setUp(self):
1673 with openfile('msg_11.txt') as fp:
1674 self._text = fp.read()
1675
1676 def test_type_error(self):
1677 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1678
1679 def test_valid_argument(self):
1680 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001681 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001682 subject = 'A sub-message'
1683 m = Message()
1684 m['Subject'] = subject
1685 r = MIMEMessage(m)
1686 eq(r.get_content_type(), 'message/rfc822')
1687 payload = r.get_payload()
1688 unless(isinstance(payload, list))
1689 eq(len(payload), 1)
1690 subpart = payload[0]
1691 unless(subpart is m)
1692 eq(subpart['subject'], subject)
1693
1694 def test_bad_multipart(self):
1695 eq = self.assertEqual
1696 msg1 = Message()
1697 msg1['Subject'] = 'subpart 1'
1698 msg2 = Message()
1699 msg2['Subject'] = 'subpart 2'
1700 r = MIMEMessage(msg1)
1701 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
1702
1703 def test_generate(self):
1704 # First craft the message to be encapsulated
1705 m = Message()
1706 m['Subject'] = 'An enclosed message'
1707 m.set_payload('Here is the body of the message.\n')
1708 r = MIMEMessage(m)
1709 r['Subject'] = 'The enclosing message'
1710 s = StringIO()
1711 g = Generator(s)
1712 g.flatten(r)
1713 self.assertEqual(s.getvalue(), """\
1714Content-Type: message/rfc822
1715MIME-Version: 1.0
1716Subject: The enclosing message
1717
1718Subject: An enclosed message
1719
1720Here is the body of the message.
1721""")
1722
1723 def test_parse_message_rfc822(self):
1724 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001725 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001726 msg = self._msgobj('msg_11.txt')
1727 eq(msg.get_content_type(), 'message/rfc822')
1728 payload = msg.get_payload()
1729 unless(isinstance(payload, list))
1730 eq(len(payload), 1)
1731 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001732 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001733 eq(submsg['subject'], 'An enclosed message')
1734 eq(submsg.get_payload(), 'Here is the body of the message.\n')
1735
1736 def test_dsn(self):
1737 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001738 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001739 # msg 16 is a Delivery Status Notification, see RFC 1894
1740 msg = self._msgobj('msg_16.txt')
1741 eq(msg.get_content_type(), 'multipart/report')
1742 unless(msg.is_multipart())
1743 eq(len(msg.get_payload()), 3)
1744 # Subpart 1 is a text/plain, human readable section
1745 subpart = msg.get_payload(0)
1746 eq(subpart.get_content_type(), 'text/plain')
1747 eq(subpart.get_payload(), """\
1748This report relates to a message you sent with the following header fields:
1749
1750 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
1751 Date: Sun, 23 Sep 2001 20:10:55 -0700
1752 From: "Ian T. Henry" <henryi@oxy.edu>
1753 To: SoCal Raves <scr@socal-raves.org>
1754 Subject: [scr] yeah for Ians!!
1755
1756Your message cannot be delivered to the following recipients:
1757
1758 Recipient address: jangel1@cougar.noc.ucla.edu
1759 Reason: recipient reached disk quota
1760
1761""")
1762 # Subpart 2 contains the machine parsable DSN information. It
1763 # consists of two blocks of headers, represented by two nested Message
1764 # objects.
1765 subpart = msg.get_payload(1)
1766 eq(subpart.get_content_type(), 'message/delivery-status')
1767 eq(len(subpart.get_payload()), 2)
1768 # message/delivery-status should treat each block as a bunch of
1769 # headers, i.e. a bunch of Message objects.
1770 dsn1 = subpart.get_payload(0)
1771 unless(isinstance(dsn1, Message))
1772 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
1773 eq(dsn1.get_param('dns', header='reporting-mta'), '')
1774 # Try a missing one <wink>
1775 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
1776 dsn2 = subpart.get_payload(1)
1777 unless(isinstance(dsn2, Message))
1778 eq(dsn2['action'], 'failed')
1779 eq(dsn2.get_params(header='original-recipient'),
1780 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
1781 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
1782 # Subpart 3 is the original message
1783 subpart = msg.get_payload(2)
1784 eq(subpart.get_content_type(), 'message/rfc822')
1785 payload = subpart.get_payload()
1786 unless(isinstance(payload, list))
1787 eq(len(payload), 1)
1788 subsubpart = payload[0]
1789 unless(isinstance(subsubpart, Message))
1790 eq(subsubpart.get_content_type(), 'text/plain')
1791 eq(subsubpart['message-id'],
1792 '<002001c144a6$8752e060$56104586@oxy.edu>')
1793
1794 def test_epilogue(self):
1795 eq = self.ndiffAssertEqual
1796 with openfile('msg_21.txt') as fp:
1797 text = fp.read()
1798 msg = Message()
1799 msg['From'] = 'aperson@dom.ain'
1800 msg['To'] = 'bperson@dom.ain'
1801 msg['Subject'] = 'Test'
1802 msg.preamble = 'MIME message'
1803 msg.epilogue = 'End of MIME message\n'
1804 msg1 = MIMEText('One')
1805 msg2 = MIMEText('Two')
1806 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1807 msg.attach(msg1)
1808 msg.attach(msg2)
1809 sfp = StringIO()
1810 g = Generator(sfp)
1811 g.flatten(msg)
1812 eq(sfp.getvalue(), text)
1813
1814 def test_no_nl_preamble(self):
1815 eq = self.ndiffAssertEqual
1816 msg = Message()
1817 msg['From'] = 'aperson@dom.ain'
1818 msg['To'] = 'bperson@dom.ain'
1819 msg['Subject'] = 'Test'
1820 msg.preamble = 'MIME message'
1821 msg.epilogue = ''
1822 msg1 = MIMEText('One')
1823 msg2 = MIMEText('Two')
1824 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1825 msg.attach(msg1)
1826 msg.attach(msg2)
1827 eq(msg.as_string(), """\
1828From: aperson@dom.ain
1829To: bperson@dom.ain
1830Subject: Test
1831Content-Type: multipart/mixed; boundary="BOUNDARY"
1832
1833MIME message
1834--BOUNDARY
1835Content-Type: text/plain; charset="us-ascii"
1836MIME-Version: 1.0
1837Content-Transfer-Encoding: 7bit
1838
1839One
1840--BOUNDARY
1841Content-Type: text/plain; charset="us-ascii"
1842MIME-Version: 1.0
1843Content-Transfer-Encoding: 7bit
1844
1845Two
1846--BOUNDARY--
1847""")
1848
1849 def test_default_type(self):
1850 eq = self.assertEqual
1851 with openfile('msg_30.txt') as fp:
1852 msg = email.message_from_file(fp)
1853 container1 = msg.get_payload(0)
1854 eq(container1.get_default_type(), 'message/rfc822')
1855 eq(container1.get_content_type(), 'message/rfc822')
1856 container2 = msg.get_payload(1)
1857 eq(container2.get_default_type(), 'message/rfc822')
1858 eq(container2.get_content_type(), 'message/rfc822')
1859 container1a = container1.get_payload(0)
1860 eq(container1a.get_default_type(), 'text/plain')
1861 eq(container1a.get_content_type(), 'text/plain')
1862 container2a = container2.get_payload(0)
1863 eq(container2a.get_default_type(), 'text/plain')
1864 eq(container2a.get_content_type(), 'text/plain')
1865
1866 def test_default_type_with_explicit_container_type(self):
1867 eq = self.assertEqual
1868 with openfile('msg_28.txt') as fp:
1869 msg = email.message_from_file(fp)
1870 container1 = msg.get_payload(0)
1871 eq(container1.get_default_type(), 'message/rfc822')
1872 eq(container1.get_content_type(), 'message/rfc822')
1873 container2 = msg.get_payload(1)
1874 eq(container2.get_default_type(), 'message/rfc822')
1875 eq(container2.get_content_type(), 'message/rfc822')
1876 container1a = container1.get_payload(0)
1877 eq(container1a.get_default_type(), 'text/plain')
1878 eq(container1a.get_content_type(), 'text/plain')
1879 container2a = container2.get_payload(0)
1880 eq(container2a.get_default_type(), 'text/plain')
1881 eq(container2a.get_content_type(), 'text/plain')
1882
1883 def test_default_type_non_parsed(self):
1884 eq = self.assertEqual
1885 neq = self.ndiffAssertEqual
1886 # Set up container
1887 container = MIMEMultipart('digest', 'BOUNDARY')
1888 container.epilogue = ''
1889 # Set up subparts
1890 subpart1a = MIMEText('message 1\n')
1891 subpart2a = MIMEText('message 2\n')
1892 subpart1 = MIMEMessage(subpart1a)
1893 subpart2 = MIMEMessage(subpart2a)
1894 container.attach(subpart1)
1895 container.attach(subpart2)
1896 eq(subpart1.get_content_type(), 'message/rfc822')
1897 eq(subpart1.get_default_type(), 'message/rfc822')
1898 eq(subpart2.get_content_type(), 'message/rfc822')
1899 eq(subpart2.get_default_type(), 'message/rfc822')
1900 neq(container.as_string(0), '''\
1901Content-Type: multipart/digest; boundary="BOUNDARY"
1902MIME-Version: 1.0
1903
1904--BOUNDARY
1905Content-Type: message/rfc822
1906MIME-Version: 1.0
1907
1908Content-Type: text/plain; charset="us-ascii"
1909MIME-Version: 1.0
1910Content-Transfer-Encoding: 7bit
1911
1912message 1
1913
1914--BOUNDARY
1915Content-Type: message/rfc822
1916MIME-Version: 1.0
1917
1918Content-Type: text/plain; charset="us-ascii"
1919MIME-Version: 1.0
1920Content-Transfer-Encoding: 7bit
1921
1922message 2
1923
1924--BOUNDARY--
1925''')
1926 del subpart1['content-type']
1927 del subpart1['mime-version']
1928 del subpart2['content-type']
1929 del subpart2['mime-version']
1930 eq(subpart1.get_content_type(), 'message/rfc822')
1931 eq(subpart1.get_default_type(), 'message/rfc822')
1932 eq(subpart2.get_content_type(), 'message/rfc822')
1933 eq(subpart2.get_default_type(), 'message/rfc822')
1934 neq(container.as_string(0), '''\
1935Content-Type: multipart/digest; boundary="BOUNDARY"
1936MIME-Version: 1.0
1937
1938--BOUNDARY
1939
1940Content-Type: text/plain; charset="us-ascii"
1941MIME-Version: 1.0
1942Content-Transfer-Encoding: 7bit
1943
1944message 1
1945
1946--BOUNDARY
1947
1948Content-Type: text/plain; charset="us-ascii"
1949MIME-Version: 1.0
1950Content-Transfer-Encoding: 7bit
1951
1952message 2
1953
1954--BOUNDARY--
1955''')
1956
1957 def test_mime_attachments_in_constructor(self):
1958 eq = self.assertEqual
1959 text1 = MIMEText('')
1960 text2 = MIMEText('')
1961 msg = MIMEMultipart(_subparts=(text1, text2))
1962 eq(len(msg.get_payload()), 2)
1963 eq(msg.get_payload(0), text1)
1964 eq(msg.get_payload(1), text2)
1965
Christian Heimes587c2bf2008-01-19 16:21:02 +00001966 def test_default_multipart_constructor(self):
1967 msg = MIMEMultipart()
1968 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001969
1970
1971# A general test of parser->model->generator idempotency. IOW, read a message
1972# in, parse it into a message object tree, then without touching the tree,
1973# regenerate the plain text. The original text and the transformed text
1974# should be identical. Note: that we ignore the Unix-From since that may
1975# contain a changed date.
1976class TestIdempotent(TestEmailBase):
1977 def _msgobj(self, filename):
1978 with openfile(filename) as fp:
1979 data = fp.read()
1980 msg = email.message_from_string(data)
1981 return msg, data
1982
1983 def _idempotent(self, msg, text):
1984 eq = self.ndiffAssertEqual
1985 s = StringIO()
1986 g = Generator(s, maxheaderlen=0)
1987 g.flatten(msg)
1988 eq(text, s.getvalue())
1989
1990 def test_parse_text_message(self):
1991 eq = self.assertEquals
1992 msg, text = self._msgobj('msg_01.txt')
1993 eq(msg.get_content_type(), 'text/plain')
1994 eq(msg.get_content_maintype(), 'text')
1995 eq(msg.get_content_subtype(), 'plain')
1996 eq(msg.get_params()[1], ('charset', 'us-ascii'))
1997 eq(msg.get_param('charset'), 'us-ascii')
1998 eq(msg.preamble, None)
1999 eq(msg.epilogue, None)
2000 self._idempotent(msg, text)
2001
2002 def test_parse_untyped_message(self):
2003 eq = self.assertEquals
2004 msg, text = self._msgobj('msg_03.txt')
2005 eq(msg.get_content_type(), 'text/plain')
2006 eq(msg.get_params(), None)
2007 eq(msg.get_param('charset'), None)
2008 self._idempotent(msg, text)
2009
2010 def test_simple_multipart(self):
2011 msg, text = self._msgobj('msg_04.txt')
2012 self._idempotent(msg, text)
2013
2014 def test_MIME_digest(self):
2015 msg, text = self._msgobj('msg_02.txt')
2016 self._idempotent(msg, text)
2017
2018 def test_long_header(self):
2019 msg, text = self._msgobj('msg_27.txt')
2020 self._idempotent(msg, text)
2021
2022 def test_MIME_digest_with_part_headers(self):
2023 msg, text = self._msgobj('msg_28.txt')
2024 self._idempotent(msg, text)
2025
2026 def test_mixed_with_image(self):
2027 msg, text = self._msgobj('msg_06.txt')
2028 self._idempotent(msg, text)
2029
2030 def test_multipart_report(self):
2031 msg, text = self._msgobj('msg_05.txt')
2032 self._idempotent(msg, text)
2033
2034 def test_dsn(self):
2035 msg, text = self._msgobj('msg_16.txt')
2036 self._idempotent(msg, text)
2037
2038 def test_preamble_epilogue(self):
2039 msg, text = self._msgobj('msg_21.txt')
2040 self._idempotent(msg, text)
2041
2042 def test_multipart_one_part(self):
2043 msg, text = self._msgobj('msg_23.txt')
2044 self._idempotent(msg, text)
2045
2046 def test_multipart_no_parts(self):
2047 msg, text = self._msgobj('msg_24.txt')
2048 self._idempotent(msg, text)
2049
2050 def test_no_start_boundary(self):
2051 msg, text = self._msgobj('msg_31.txt')
2052 self._idempotent(msg, text)
2053
2054 def test_rfc2231_charset(self):
2055 msg, text = self._msgobj('msg_32.txt')
2056 self._idempotent(msg, text)
2057
2058 def test_more_rfc2231_parameters(self):
2059 msg, text = self._msgobj('msg_33.txt')
2060 self._idempotent(msg, text)
2061
2062 def test_text_plain_in_a_multipart_digest(self):
2063 msg, text = self._msgobj('msg_34.txt')
2064 self._idempotent(msg, text)
2065
2066 def test_nested_multipart_mixeds(self):
2067 msg, text = self._msgobj('msg_12a.txt')
2068 self._idempotent(msg, text)
2069
2070 def test_message_external_body_idempotent(self):
2071 msg, text = self._msgobj('msg_36.txt')
2072 self._idempotent(msg, text)
2073
R. David Murray96fd54e2010-10-08 15:55:28 +00002074 def test_message_signed_idempotent(self):
2075 msg, text = self._msgobj('msg_45.txt')
2076 self._idempotent(msg, text)
2077
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002078 def test_content_type(self):
2079 eq = self.assertEquals
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002080 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002081 # Get a message object and reset the seek pointer for other tests
2082 msg, text = self._msgobj('msg_05.txt')
2083 eq(msg.get_content_type(), 'multipart/report')
2084 # Test the Content-Type: parameters
2085 params = {}
2086 for pk, pv in msg.get_params():
2087 params[pk] = pv
2088 eq(params['report-type'], 'delivery-status')
2089 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
2090 eq(msg.preamble, 'This is a MIME-encapsulated message.\n')
2091 eq(msg.epilogue, '\n')
2092 eq(len(msg.get_payload()), 3)
2093 # Make sure the subparts are what we expect
2094 msg1 = msg.get_payload(0)
2095 eq(msg1.get_content_type(), 'text/plain')
2096 eq(msg1.get_payload(), 'Yadda yadda yadda\n')
2097 msg2 = msg.get_payload(1)
2098 eq(msg2.get_content_type(), 'text/plain')
2099 eq(msg2.get_payload(), 'Yadda yadda yadda\n')
2100 msg3 = msg.get_payload(2)
2101 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002102 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002103 payload = msg3.get_payload()
2104 unless(isinstance(payload, list))
2105 eq(len(payload), 1)
2106 msg4 = payload[0]
2107 unless(isinstance(msg4, Message))
2108 eq(msg4.get_payload(), 'Yadda yadda yadda\n')
2109
2110 def test_parser(self):
2111 eq = self.assertEquals
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002112 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002113 msg, text = self._msgobj('msg_06.txt')
2114 # Check some of the outer headers
2115 eq(msg.get_content_type(), 'message/rfc822')
2116 # Make sure the payload is a list of exactly one sub-Message, and that
2117 # that submessage has a type of text/plain
2118 payload = msg.get_payload()
2119 unless(isinstance(payload, list))
2120 eq(len(payload), 1)
2121 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002122 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002123 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002124 self.assertTrue(isinstance(msg1.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002125 eq(msg1.get_payload(), '\n')
2126
2127
2128
2129# Test various other bits of the package's functionality
2130class TestMiscellaneous(TestEmailBase):
2131 def test_message_from_string(self):
2132 with openfile('msg_01.txt') as fp:
2133 text = fp.read()
2134 msg = email.message_from_string(text)
2135 s = StringIO()
2136 # Don't wrap/continue long headers since we're trying to test
2137 # idempotency.
2138 g = Generator(s, maxheaderlen=0)
2139 g.flatten(msg)
2140 self.assertEqual(text, s.getvalue())
2141
2142 def test_message_from_file(self):
2143 with openfile('msg_01.txt') as fp:
2144 text = fp.read()
2145 fp.seek(0)
2146 msg = email.message_from_file(fp)
2147 s = StringIO()
2148 # Don't wrap/continue long headers since we're trying to test
2149 # idempotency.
2150 g = Generator(s, maxheaderlen=0)
2151 g.flatten(msg)
2152 self.assertEqual(text, s.getvalue())
2153
2154 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002155 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002156 with openfile('msg_01.txt') as fp:
2157 text = fp.read()
2158
2159 # Create a subclass
2160 class MyMessage(Message):
2161 pass
2162
2163 msg = email.message_from_string(text, MyMessage)
2164 unless(isinstance(msg, MyMessage))
2165 # Try something more complicated
2166 with openfile('msg_02.txt') as fp:
2167 text = fp.read()
2168 msg = email.message_from_string(text, MyMessage)
2169 for subpart in msg.walk():
2170 unless(isinstance(subpart, MyMessage))
2171
2172 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002173 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002174 # Create a subclass
2175 class MyMessage(Message):
2176 pass
2177
2178 with openfile('msg_01.txt') as fp:
2179 msg = email.message_from_file(fp, MyMessage)
2180 unless(isinstance(msg, MyMessage))
2181 # Try something more complicated
2182 with openfile('msg_02.txt') as fp:
2183 msg = email.message_from_file(fp, MyMessage)
2184 for subpart in msg.walk():
2185 unless(isinstance(subpart, MyMessage))
2186
2187 def test__all__(self):
2188 module = __import__('email')
2189 # Can't use sorted() here due to Python 2.3 compatibility
2190 all = module.__all__[:]
2191 all.sort()
2192 self.assertEqual(all, [
2193 'base64mime', 'charset', 'encoders', 'errors', 'generator',
R. David Murray96fd54e2010-10-08 15:55:28 +00002194 'header', 'iterators', 'message', 'message_from_binary_file',
2195 'message_from_bytes', 'message_from_file',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002196 'message_from_string', 'mime', 'parser',
2197 'quoprimime', 'utils',
2198 ])
2199
2200 def test_formatdate(self):
2201 now = time.time()
2202 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2203 time.gmtime(now)[:6])
2204
2205 def test_formatdate_localtime(self):
2206 now = time.time()
2207 self.assertEqual(
2208 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2209 time.localtime(now)[:6])
2210
2211 def test_formatdate_usegmt(self):
2212 now = time.time()
2213 self.assertEqual(
2214 utils.formatdate(now, localtime=False),
2215 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2216 self.assertEqual(
2217 utils.formatdate(now, localtime=False, usegmt=True),
2218 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2219
2220 def test_parsedate_none(self):
2221 self.assertEqual(utils.parsedate(''), None)
2222
2223 def test_parsedate_compact(self):
2224 # The FWS after the comma is optional
2225 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2226 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2227
2228 def test_parsedate_no_dayofweek(self):
2229 eq = self.assertEqual
2230 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2231 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2232
2233 def test_parsedate_compact_no_dayofweek(self):
2234 eq = self.assertEqual
2235 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2236 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2237
2238 def test_parsedate_acceptable_to_time_functions(self):
2239 eq = self.assertEqual
2240 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2241 t = int(time.mktime(timetup))
2242 eq(time.localtime(t)[:6], timetup[:6])
2243 eq(int(time.strftime('%Y', timetup)), 2003)
2244 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2245 t = int(time.mktime(timetup[:9]))
2246 eq(time.localtime(t)[:6], timetup[:6])
2247 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2248
R. David Murray219d1c82010-08-25 00:45:55 +00002249 def test_parsedate_y2k(self):
2250 """Test for parsing a date with a two-digit year.
2251
2252 Parsing a date with a two-digit year should return the correct
2253 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2254 obsoletes RFC822) requires four-digit years.
2255
2256 """
2257 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2258 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2259 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2260 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2261
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002262 def test_parseaddr_empty(self):
2263 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2264 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2265
2266 def test_noquote_dump(self):
2267 self.assertEqual(
2268 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2269 'A Silly Person <person@dom.ain>')
2270
2271 def test_escape_dump(self):
2272 self.assertEqual(
2273 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2274 r'"A \(Very\) Silly Person" <person@dom.ain>')
2275 a = r'A \(Special\) Person'
2276 b = 'person@dom.ain'
2277 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2278
2279 def test_escape_backslashes(self):
2280 self.assertEqual(
2281 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2282 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2283 a = r'Arthur \Backslash\ Foobar'
2284 b = 'person@dom.ain'
2285 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2286
2287 def test_name_with_dot(self):
2288 x = 'John X. Doe <jxd@example.com>'
2289 y = '"John X. Doe" <jxd@example.com>'
2290 a, b = ('John X. Doe', 'jxd@example.com')
2291 self.assertEqual(utils.parseaddr(x), (a, b))
2292 self.assertEqual(utils.parseaddr(y), (a, b))
2293 # formataddr() quotes the name if there's a dot in it
2294 self.assertEqual(utils.formataddr((a, b)), y)
2295
R. David Murray5397e862010-10-02 15:58:26 +00002296 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2297 # issue 10005. Note that in the third test the second pair of
2298 # backslashes is not actually a quoted pair because it is not inside a
2299 # comment or quoted string: the address being parsed has a quoted
2300 # string containing a quoted backslash, followed by 'example' and two
2301 # backslashes, followed by another quoted string containing a space and
2302 # the word 'example'. parseaddr copies those two backslashes
2303 # literally. Per rfc5322 this is not technically correct since a \ may
2304 # not appear in an address outside of a quoted string. It is probably
2305 # a sensible Postel interpretation, though.
2306 eq = self.assertEqual
2307 eq(utils.parseaddr('""example" example"@example.com'),
2308 ('', '""example" example"@example.com'))
2309 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2310 ('', '"\\"example\\" example"@example.com'))
2311 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2312 ('', '"\\\\"example\\\\" example"@example.com'))
2313
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002314 def test_multiline_from_comment(self):
2315 x = """\
2316Foo
2317\tBar <foo@example.com>"""
2318 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2319
2320 def test_quote_dump(self):
2321 self.assertEqual(
2322 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2323 r'"A Silly; Person" <person@dom.ain>')
2324
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002325 def test_charset_richcomparisons(self):
2326 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002327 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002328 cset1 = Charset()
2329 cset2 = Charset()
2330 eq(cset1, 'us-ascii')
2331 eq(cset1, 'US-ASCII')
2332 eq(cset1, 'Us-AsCiI')
2333 eq('us-ascii', cset1)
2334 eq('US-ASCII', cset1)
2335 eq('Us-AsCiI', cset1)
2336 ne(cset1, 'usascii')
2337 ne(cset1, 'USASCII')
2338 ne(cset1, 'UsAsCiI')
2339 ne('usascii', cset1)
2340 ne('USASCII', cset1)
2341 ne('UsAsCiI', cset1)
2342 eq(cset1, cset2)
2343 eq(cset2, cset1)
2344
2345 def test_getaddresses(self):
2346 eq = self.assertEqual
2347 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2348 'Bud Person <bperson@dom.ain>']),
2349 [('Al Person', 'aperson@dom.ain'),
2350 ('Bud Person', 'bperson@dom.ain')])
2351
2352 def test_getaddresses_nasty(self):
2353 eq = self.assertEqual
2354 eq(utils.getaddresses(['foo: ;']), [('', '')])
2355 eq(utils.getaddresses(
2356 ['[]*-- =~$']),
2357 [('', ''), ('', ''), ('', '*--')])
2358 eq(utils.getaddresses(
2359 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2360 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2361
2362 def test_getaddresses_embedded_comment(self):
2363 """Test proper handling of a nested comment"""
2364 eq = self.assertEqual
2365 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2366 eq(addrs[0][1], 'foo@bar.com')
2367
2368 def test_utils_quote_unquote(self):
2369 eq = self.assertEqual
2370 msg = Message()
2371 msg.add_header('content-disposition', 'attachment',
2372 filename='foo\\wacky"name')
2373 eq(msg.get_filename(), 'foo\\wacky"name')
2374
2375 def test_get_body_encoding_with_bogus_charset(self):
2376 charset = Charset('not a charset')
2377 self.assertEqual(charset.get_body_encoding(), 'base64')
2378
2379 def test_get_body_encoding_with_uppercase_charset(self):
2380 eq = self.assertEqual
2381 msg = Message()
2382 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2383 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2384 charsets = msg.get_charsets()
2385 eq(len(charsets), 1)
2386 eq(charsets[0], 'utf-8')
2387 charset = Charset(charsets[0])
2388 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002389 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002390 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2391 eq(msg.get_payload(decode=True), b'hello world')
2392 eq(msg['content-transfer-encoding'], 'base64')
2393 # Try another one
2394 msg = Message()
2395 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2396 charsets = msg.get_charsets()
2397 eq(len(charsets), 1)
2398 eq(charsets[0], 'us-ascii')
2399 charset = Charset(charsets[0])
2400 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2401 msg.set_payload('hello world', charset=charset)
2402 eq(msg.get_payload(), 'hello world')
2403 eq(msg['content-transfer-encoding'], '7bit')
2404
2405 def test_charsets_case_insensitive(self):
2406 lc = Charset('us-ascii')
2407 uc = Charset('US-ASCII')
2408 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2409
2410 def test_partial_falls_inside_message_delivery_status(self):
2411 eq = self.ndiffAssertEqual
2412 # The Parser interface provides chunks of data to FeedParser in 8192
2413 # byte gulps. SF bug #1076485 found one of those chunks inside
2414 # message/delivery-status header block, which triggered an
2415 # unreadline() of NeedMoreData.
2416 msg = self._msgobj('msg_43.txt')
2417 sfp = StringIO()
2418 iterators._structure(msg, sfp)
2419 eq(sfp.getvalue(), """\
2420multipart/report
2421 text/plain
2422 message/delivery-status
2423 text/plain
2424 text/plain
2425 text/plain
2426 text/plain
2427 text/plain
2428 text/plain
2429 text/plain
2430 text/plain
2431 text/plain
2432 text/plain
2433 text/plain
2434 text/plain
2435 text/plain
2436 text/plain
2437 text/plain
2438 text/plain
2439 text/plain
2440 text/plain
2441 text/plain
2442 text/plain
2443 text/plain
2444 text/plain
2445 text/plain
2446 text/plain
2447 text/plain
2448 text/plain
2449 text/rfc822-headers
2450""")
2451
2452
2453
2454# Test the iterator/generators
2455class TestIterators(TestEmailBase):
2456 def test_body_line_iterator(self):
2457 eq = self.assertEqual
2458 neq = self.ndiffAssertEqual
2459 # First a simple non-multipart message
2460 msg = self._msgobj('msg_01.txt')
2461 it = iterators.body_line_iterator(msg)
2462 lines = list(it)
2463 eq(len(lines), 6)
2464 neq(EMPTYSTRING.join(lines), msg.get_payload())
2465 # Now a more complicated multipart
2466 msg = self._msgobj('msg_02.txt')
2467 it = iterators.body_line_iterator(msg)
2468 lines = list(it)
2469 eq(len(lines), 43)
2470 with openfile('msg_19.txt') as fp:
2471 neq(EMPTYSTRING.join(lines), fp.read())
2472
2473 def test_typed_subpart_iterator(self):
2474 eq = self.assertEqual
2475 msg = self._msgobj('msg_04.txt')
2476 it = iterators.typed_subpart_iterator(msg, 'text')
2477 lines = []
2478 subparts = 0
2479 for subpart in it:
2480 subparts += 1
2481 lines.append(subpart.get_payload())
2482 eq(subparts, 2)
2483 eq(EMPTYSTRING.join(lines), """\
2484a simple kind of mirror
2485to reflect upon our own
2486a simple kind of mirror
2487to reflect upon our own
2488""")
2489
2490 def test_typed_subpart_iterator_default_type(self):
2491 eq = self.assertEqual
2492 msg = self._msgobj('msg_03.txt')
2493 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2494 lines = []
2495 subparts = 0
2496 for subpart in it:
2497 subparts += 1
2498 lines.append(subpart.get_payload())
2499 eq(subparts, 1)
2500 eq(EMPTYSTRING.join(lines), """\
2501
2502Hi,
2503
2504Do you like this message?
2505
2506-Me
2507""")
2508
R. David Murray45bf773f2010-07-17 01:19:57 +00002509 def test_pushCR_LF(self):
2510 '''FeedParser BufferedSubFile.push() assumed it received complete
2511 line endings. A CR ending one push() followed by a LF starting
2512 the next push() added an empty line.
2513 '''
2514 imt = [
2515 ("a\r \n", 2),
2516 ("b", 0),
2517 ("c\n", 1),
2518 ("", 0),
2519 ("d\r\n", 1),
2520 ("e\r", 0),
2521 ("\nf", 1),
2522 ("\r\n", 1),
2523 ]
2524 from email.feedparser import BufferedSubFile, NeedMoreData
2525 bsf = BufferedSubFile()
2526 om = []
2527 nt = 0
2528 for il, n in imt:
2529 bsf.push(il)
2530 nt += n
2531 n1 = 0
2532 while True:
2533 ol = bsf.readline()
2534 if ol == NeedMoreData:
2535 break
2536 om.append(ol)
2537 n1 += 1
2538 self.assertTrue(n == n1)
2539 self.assertTrue(len(om) == nt)
2540 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2541
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002542
2543
2544class TestParsers(TestEmailBase):
2545 def test_header_parser(self):
2546 eq = self.assertEqual
2547 # Parse only the headers of a complex multipart MIME document
2548 with openfile('msg_02.txt') as fp:
2549 msg = HeaderParser().parse(fp)
2550 eq(msg['from'], 'ppp-request@zzz.org')
2551 eq(msg['to'], 'ppp@zzz.org')
2552 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002553 self.assertFalse(msg.is_multipart())
2554 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002555
2556 def test_whitespace_continuation(self):
2557 eq = self.assertEqual
2558 # This message contains a line after the Subject: header that has only
2559 # whitespace, but it is not empty!
2560 msg = email.message_from_string("""\
2561From: aperson@dom.ain
2562To: bperson@dom.ain
2563Subject: the next line has a space on it
2564\x20
2565Date: Mon, 8 Apr 2002 15:09:19 -0400
2566Message-ID: spam
2567
2568Here's the message body
2569""")
2570 eq(msg['subject'], 'the next line has a space on it\n ')
2571 eq(msg['message-id'], 'spam')
2572 eq(msg.get_payload(), "Here's the message body\n")
2573
2574 def test_whitespace_continuation_last_header(self):
2575 eq = self.assertEqual
2576 # Like the previous test, but the subject line is the last
2577 # header.
2578 msg = email.message_from_string("""\
2579From: aperson@dom.ain
2580To: bperson@dom.ain
2581Date: Mon, 8 Apr 2002 15:09:19 -0400
2582Message-ID: spam
2583Subject: the next line has a space on it
2584\x20
2585
2586Here's the message body
2587""")
2588 eq(msg['subject'], 'the next line has a space on it\n ')
2589 eq(msg['message-id'], 'spam')
2590 eq(msg.get_payload(), "Here's the message body\n")
2591
2592 def test_crlf_separation(self):
2593 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002594 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002595 msg = Parser().parse(fp)
2596 eq(len(msg.get_payload()), 2)
2597 part1 = msg.get_payload(0)
2598 eq(part1.get_content_type(), 'text/plain')
2599 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2600 part2 = msg.get_payload(1)
2601 eq(part2.get_content_type(), 'application/riscos')
2602
R. David Murray8451c4b2010-10-23 22:19:56 +00002603 def test_crlf_flatten(self):
2604 # Using newline='\n' preserves the crlfs in this input file.
2605 with openfile('msg_26.txt', newline='\n') as fp:
2606 text = fp.read()
2607 msg = email.message_from_string(text)
2608 s = StringIO()
2609 g = Generator(s)
2610 g.flatten(msg, linesep='\r\n')
2611 self.assertEqual(s.getvalue(), text)
2612
2613 maxDiff = None
2614
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002615 def test_multipart_digest_with_extra_mime_headers(self):
2616 eq = self.assertEqual
2617 neq = self.ndiffAssertEqual
2618 with openfile('msg_28.txt') as fp:
2619 msg = email.message_from_file(fp)
2620 # Structure is:
2621 # multipart/digest
2622 # message/rfc822
2623 # text/plain
2624 # message/rfc822
2625 # text/plain
2626 eq(msg.is_multipart(), 1)
2627 eq(len(msg.get_payload()), 2)
2628 part1 = msg.get_payload(0)
2629 eq(part1.get_content_type(), 'message/rfc822')
2630 eq(part1.is_multipart(), 1)
2631 eq(len(part1.get_payload()), 1)
2632 part1a = part1.get_payload(0)
2633 eq(part1a.is_multipart(), 0)
2634 eq(part1a.get_content_type(), 'text/plain')
2635 neq(part1a.get_payload(), 'message 1\n')
2636 # next message/rfc822
2637 part2 = msg.get_payload(1)
2638 eq(part2.get_content_type(), 'message/rfc822')
2639 eq(part2.is_multipart(), 1)
2640 eq(len(part2.get_payload()), 1)
2641 part2a = part2.get_payload(0)
2642 eq(part2a.is_multipart(), 0)
2643 eq(part2a.get_content_type(), 'text/plain')
2644 neq(part2a.get_payload(), 'message 2\n')
2645
2646 def test_three_lines(self):
2647 # A bug report by Andrew McNamara
2648 lines = ['From: Andrew Person <aperson@dom.ain',
2649 'Subject: Test',
2650 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2651 msg = email.message_from_string(NL.join(lines))
2652 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
2653
2654 def test_strip_line_feed_and_carriage_return_in_headers(self):
2655 eq = self.assertEqual
2656 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
2657 value1 = 'text'
2658 value2 = 'more text'
2659 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
2660 value1, value2)
2661 msg = email.message_from_string(m)
2662 eq(msg.get('Header'), value1)
2663 eq(msg.get('Next-Header'), value2)
2664
2665 def test_rfc2822_header_syntax(self):
2666 eq = self.assertEqual
2667 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2668 msg = email.message_from_string(m)
2669 eq(len(msg), 3)
2670 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
2671 eq(msg.get_payload(), 'body')
2672
2673 def test_rfc2822_space_not_allowed_in_header(self):
2674 eq = self.assertEqual
2675 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2676 msg = email.message_from_string(m)
2677 eq(len(msg.keys()), 0)
2678
2679 def test_rfc2822_one_character_header(self):
2680 eq = self.assertEqual
2681 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
2682 msg = email.message_from_string(m)
2683 headers = msg.keys()
2684 headers.sort()
2685 eq(headers, ['A', 'B', 'CC'])
2686 eq(msg.get_payload(), 'body')
2687
R. David Murray45e0e142010-06-16 02:19:40 +00002688 def test_CRLFLF_at_end_of_part(self):
2689 # issue 5610: feedparser should not eat two chars from body part ending
2690 # with "\r\n\n".
2691 m = (
2692 "From: foo@bar.com\n"
2693 "To: baz\n"
2694 "Mime-Version: 1.0\n"
2695 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
2696 "\n"
2697 "--BOUNDARY\n"
2698 "Content-Type: text/plain\n"
2699 "\n"
2700 "body ending with CRLF newline\r\n"
2701 "\n"
2702 "--BOUNDARY--\n"
2703 )
2704 msg = email.message_from_string(m)
2705 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002706
2707
R. David Murray96fd54e2010-10-08 15:55:28 +00002708class Test8BitBytesHandling(unittest.TestCase):
2709 # In Python3 all input is string, but that doesn't work if the actual input
2710 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
2711 # decode byte streams using the surrogateescape error handler, and
2712 # reconvert to binary at appropriate places if we detect surrogates. This
2713 # doesn't allow us to transform headers with 8bit bytes (they get munged),
2714 # but it does allow us to parse and preserve them, and to decode body
2715 # parts that use an 8bit CTE.
2716
2717 bodytest_msg = textwrap.dedent("""\
2718 From: foo@bar.com
2719 To: baz
2720 Mime-Version: 1.0
2721 Content-Type: text/plain; charset={charset}
2722 Content-Transfer-Encoding: {cte}
2723
2724 {bodyline}
2725 """)
2726
2727 def test_known_8bit_CTE(self):
2728 m = self.bodytest_msg.format(charset='utf-8',
2729 cte='8bit',
2730 bodyline='pöstal').encode('utf-8')
2731 msg = email.message_from_bytes(m)
2732 self.assertEqual(msg.get_payload(), "pöstal\n")
2733 self.assertEqual(msg.get_payload(decode=True),
2734 "pöstal\n".encode('utf-8'))
2735
2736 def test_unknown_8bit_CTE(self):
2737 m = self.bodytest_msg.format(charset='notavalidcharset',
2738 cte='8bit',
2739 bodyline='pöstal').encode('utf-8')
2740 msg = email.message_from_bytes(m)
2741 self.assertEqual(msg.get_payload(), "p��stal\n")
2742 self.assertEqual(msg.get_payload(decode=True),
2743 "pöstal\n".encode('utf-8'))
2744
2745 def test_8bit_in_quopri_body(self):
2746 # This is non-RFC compliant data...without 'decode' the library code
2747 # decodes the body using the charset from the headers, and because the
2748 # source byte really is utf-8 this works. This is likely to fail
2749 # against real dirty data (ie: produce mojibake), but the data is
2750 # invalid anyway so it is as good a guess as any. But this means that
2751 # this test just confirms the current behavior; that behavior is not
2752 # necessarily the best possible behavior. With 'decode' it is
2753 # returning the raw bytes, so that test should be of correct behavior,
2754 # or at least produce the same result that email4 did.
2755 m = self.bodytest_msg.format(charset='utf-8',
2756 cte='quoted-printable',
2757 bodyline='p=C3=B6stál').encode('utf-8')
2758 msg = email.message_from_bytes(m)
2759 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
2760 self.assertEqual(msg.get_payload(decode=True),
2761 'pöstál\n'.encode('utf-8'))
2762
2763 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
2764 # This is similar to the previous test, but proves that if the 8bit
2765 # byte is undecodeable in the specified charset, it gets replaced
2766 # by the unicode 'unknown' character. Again, this may or may not
2767 # be the ideal behavior. Note that if decode=False none of the
2768 # decoders will get involved, so this is the only test we need
2769 # for this behavior.
2770 m = self.bodytest_msg.format(charset='ascii',
2771 cte='quoted-printable',
2772 bodyline='p=C3=B6stál').encode('utf-8')
2773 msg = email.message_from_bytes(m)
2774 self.assertEqual(msg.get_payload(), 'p=C3=B6st��l\n')
2775 self.assertEqual(msg.get_payload(decode=True),
2776 'pöstál\n'.encode('utf-8'))
2777
2778 def test_8bit_in_base64_body(self):
2779 # Sticking an 8bit byte in a base64 block makes it undecodable by
2780 # normal means, so the block is returned undecoded, but as bytes.
2781 m = self.bodytest_msg.format(charset='utf-8',
2782 cte='base64',
2783 bodyline='cMO2c3RhbAá=').encode('utf-8')
2784 msg = email.message_from_bytes(m)
2785 self.assertEqual(msg.get_payload(decode=True),
2786 'cMO2c3RhbAá=\n'.encode('utf-8'))
2787
2788 def test_8bit_in_uuencode_body(self):
2789 # Sticking an 8bit byte in a uuencode block makes it undecodable by
2790 # normal means, so the block is returned undecoded, but as bytes.
2791 m = self.bodytest_msg.format(charset='utf-8',
2792 cte='uuencode',
2793 bodyline='<,.V<W1A; á ').encode('utf-8')
2794 msg = email.message_from_bytes(m)
2795 self.assertEqual(msg.get_payload(decode=True),
2796 '<,.V<W1A; á \n'.encode('utf-8'))
2797
2798
2799 headertest_msg = textwrap.dedent("""\
2800 From: foo@bar.com
2801 To: báz
2802 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
2803 \tJean de Baddie
2804 From: göst
2805
2806 Yes, they are flying.
2807 """).encode('utf-8')
2808
2809 def test_get_8bit_header(self):
2810 msg = email.message_from_bytes(self.headertest_msg)
2811 self.assertEqual(msg.get('to'), 'b??z')
2812 self.assertEqual(msg['to'], 'b??z')
2813
2814 def test_print_8bit_headers(self):
2815 msg = email.message_from_bytes(self.headertest_msg)
2816 self.assertEqual(str(msg),
2817 self.headertest_msg.decode(
2818 'ascii', 'replace').replace('�', '?'))
2819
2820 def test_values_with_8bit_headers(self):
2821 msg = email.message_from_bytes(self.headertest_msg)
2822 self.assertListEqual(msg.values(),
2823 ['foo@bar.com',
2824 'b??z',
2825 'Maintenant je vous pr??sente mon '
2826 'coll??gue, le pouf c??l??bre\n'
2827 '\tJean de Baddie',
2828 "g??st"])
2829
2830 def test_items_with_8bit_headers(self):
2831 msg = email.message_from_bytes(self.headertest_msg)
2832 self.assertListEqual(msg.items(),
2833 [('From', 'foo@bar.com'),
2834 ('To', 'b??z'),
2835 ('Subject', 'Maintenant je vous pr??sente mon '
2836 'coll??gue, le pouf c??l??bre\n'
2837 '\tJean de Baddie'),
2838 ('From', 'g??st')])
2839
2840 def test_get_all_with_8bit_headers(self):
2841 msg = email.message_from_bytes(self.headertest_msg)
2842 self.assertListEqual(msg.get_all('from'),
2843 ['foo@bar.com',
2844 'g??st'])
2845
2846 non_latin_bin_msg = textwrap.dedent("""\
2847 From: foo@bar.com
2848 To: báz
2849 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
2850 \tJean de Baddie
2851 Mime-Version: 1.0
2852 Content-Type: text/plain; charset="utf-8"
2853 Content-Transfer-Encoding: 8bit
2854
2855 Да, они летят.
2856 """).encode('utf-8')
2857
2858 def test_bytes_generator(self):
2859 msg = email.message_from_bytes(self.non_latin_bin_msg)
2860 out = BytesIO()
2861 email.generator.BytesGenerator(out).flatten(msg)
2862 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
2863
2864 # XXX: ultimately the '?' should turn into CTE encoded bytes
2865 # using 'unknown-8bit' charset.
2866 non_latin_bin_msg_as7bit = textwrap.dedent("""\
2867 From: foo@bar.com
2868 To: b??z
2869 Subject: Maintenant je vous pr??sente mon coll??gue, le pouf c??l??bre
2870 \tJean de Baddie
2871 Mime-Version: 1.0
2872 Content-Type: text/plain; charset="utf-8"
2873 Content-Transfer-Encoding: base64
2874
2875 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
2876 """)
2877
2878 def test_generator_handles_8bit(self):
2879 msg = email.message_from_bytes(self.non_latin_bin_msg)
2880 out = StringIO()
2881 email.generator.Generator(out).flatten(msg)
2882 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit)
2883
2884 def test_bytes_generator_with_unix_from(self):
2885 # The unixfrom contains a current date, so we can't check it
2886 # literally. Just make sure the first word is 'From' and the
2887 # rest of the message matches the input.
2888 msg = email.message_from_bytes(self.non_latin_bin_msg)
2889 out = BytesIO()
2890 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
2891 lines = out.getvalue().split(b'\n')
2892 self.assertEqual(lines[0].split()[0], b'From')
2893 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
2894
2895 def test_message_from_binary_file(self):
2896 fn = 'test.msg'
2897 self.addCleanup(unlink, fn)
2898 with open(fn, 'wb') as testfile:
2899 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00002900 with open(fn, 'rb') as testfile:
2901 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00002902 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
2903
2904 latin_bin_msg = textwrap.dedent("""\
2905 From: foo@bar.com
2906 To: Dinsdale
2907 Subject: Nudge nudge, wink, wink
2908 Mime-Version: 1.0
2909 Content-Type: text/plain; charset="latin-1"
2910 Content-Transfer-Encoding: 8bit
2911
2912 oh là là, know what I mean, know what I mean?
2913 """).encode('latin-1')
2914
2915 latin_bin_msg_as7bit = textwrap.dedent("""\
2916 From: foo@bar.com
2917 To: Dinsdale
2918 Subject: Nudge nudge, wink, wink
2919 Mime-Version: 1.0
2920 Content-Type: text/plain; charset="iso-8859-1"
2921 Content-Transfer-Encoding: quoted-printable
2922
2923 oh l=E0 l=E0, know what I mean, know what I mean?
2924 """)
2925
2926 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
2927 m = email.message_from_bytes(self.latin_bin_msg)
2928 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
2929
2930 def test_decoded_generator_emits_unicode_body(self):
2931 m = email.message_from_bytes(self.latin_bin_msg)
2932 out = StringIO()
2933 email.generator.DecodedGenerator(out).flatten(m)
2934 #DecodedHeader output contains an extra blank line compared
2935 #to the input message. RDM: not sure if this is a bug or not,
2936 #but it is not specific to the 8bit->7bit conversion.
2937 self.assertEqual(out.getvalue(),
2938 self.latin_bin_msg.decode('latin-1')+'\n')
2939
2940 def test_bytes_feedparser(self):
2941 bfp = email.feedparser.BytesFeedParser()
2942 for i in range(0, len(self.latin_bin_msg), 10):
2943 bfp.feed(self.latin_bin_msg[i:i+10])
2944 m = bfp.close()
2945 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
2946
R. David Murray8451c4b2010-10-23 22:19:56 +00002947 def test_crlf_flatten(self):
2948 with openfile('msg_26.txt', 'rb') as fp:
2949 text = fp.read()
2950 msg = email.message_from_bytes(text)
2951 s = BytesIO()
2952 g = email.generator.BytesGenerator(s)
2953 g.flatten(msg, linesep='\r\n')
2954 self.assertEqual(s.getvalue(), text)
2955 maxDiff = None
2956
R. David Murray96fd54e2010-10-08 15:55:28 +00002957
2958class TestBytesGeneratorIdempotent(TestIdempotent):
2959
R. David Murraye5db2632010-11-20 15:10:13 +00002960 maxDiff = None
2961
R. David Murray96fd54e2010-10-08 15:55:28 +00002962 def _msgobj(self, filename):
2963 with openfile(filename, 'rb') as fp:
2964 data = fp.read()
2965 msg = email.message_from_bytes(data)
2966 return msg, data
2967
2968 def _idempotent(self, msg, data):
R. David Murraye5db2632010-11-20 15:10:13 +00002969 # 13 = b'\r'
2970 linesep = '\r\n' if data[data.index(b'\n')-1] == 13 else '\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00002971 b = BytesIO()
2972 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murraye5db2632010-11-20 15:10:13 +00002973 g.flatten(msg, linesep=linesep)
2974 self.assertByteStringsEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00002975
R. David Murraye5db2632010-11-20 15:10:13 +00002976 def assertByteStringsEqual(self, str1, str2):
R. David Murray96fd54e2010-10-08 15:55:28 +00002977 self.assertListEqual(str1.split(b'\n'), str2.split(b'\n'))
2978
2979
2980
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002981class TestBase64(unittest.TestCase):
2982 def test_len(self):
2983 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00002984 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002985 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002986 for size in range(15):
2987 if size == 0 : bsize = 0
2988 elif size <= 3 : bsize = 4
2989 elif size <= 6 : bsize = 8
2990 elif size <= 9 : bsize = 12
2991 elif size <= 12: bsize = 16
2992 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00002993 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002994
2995 def test_decode(self):
2996 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00002997 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002998 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002999
3000 def test_encode(self):
3001 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003002 eq(base64mime.body_encode(b''), b'')
3003 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003004 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003005 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003006 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003007 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003008eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3009eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3010eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3011eHh4eCB4eHh4IA==
3012""")
3013 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003014 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003015 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003016eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3017eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3018eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3019eHh4eCB4eHh4IA==\r
3020""")
3021
3022 def test_header_encode(self):
3023 eq = self.assertEqual
3024 he = base64mime.header_encode
3025 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003026 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3027 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003028 # Test the charset option
3029 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3030 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003031
3032
3033
3034class TestQuopri(unittest.TestCase):
3035 def setUp(self):
3036 # Set of characters (as byte integers) that don't need to be encoded
3037 # in headers.
3038 self.hlit = list(chain(
3039 range(ord('a'), ord('z') + 1),
3040 range(ord('A'), ord('Z') + 1),
3041 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003042 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003043 # Set of characters (as byte integers) that do need to be encoded in
3044 # headers.
3045 self.hnon = [c for c in range(256) if c not in self.hlit]
3046 assert len(self.hlit) + len(self.hnon) == 256
3047 # Set of characters (as byte integers) that don't need to be encoded
3048 # in bodies.
3049 self.blit = list(range(ord(' '), ord('~') + 1))
3050 self.blit.append(ord('\t'))
3051 self.blit.remove(ord('='))
3052 # Set of characters (as byte integers) that do need to be encoded in
3053 # bodies.
3054 self.bnon = [c for c in range(256) if c not in self.blit]
3055 assert len(self.blit) + len(self.bnon) == 256
3056
Guido van Rossum9604e662007-08-30 03:46:43 +00003057 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003058 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003059 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003060 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003061 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003062 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003063 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003064
Guido van Rossum9604e662007-08-30 03:46:43 +00003065 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003066 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003067 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003068 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003069 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003070 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003071 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003072
3073 def test_header_quopri_len(self):
3074 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003075 eq(quoprimime.header_length(b'hello'), 5)
3076 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003077 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003078 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003079 # =?xxx?q?...?= means 10 extra characters
3080 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003081 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3082 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003083 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003084 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003085 # =?xxx?q?...?= means 10 extra characters
3086 10)
3087 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003088 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003089 'expected length 1 for %r' % chr(c))
3090 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003091 # Space is special; it's encoded to _
3092 if c == ord(' '):
3093 continue
3094 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003095 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003096 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003097
3098 def test_body_quopri_len(self):
3099 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003100 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003101 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003102 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003103 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003104
3105 def test_quote_unquote_idempotent(self):
3106 for x in range(256):
3107 c = chr(x)
3108 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3109
3110 def test_header_encode(self):
3111 eq = self.assertEqual
3112 he = quoprimime.header_encode
3113 eq(he(b'hello'), '=?iso-8859-1?q?hello?=')
3114 eq(he(b'hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=')
3115 eq(he(b'hello\nworld'), '=?iso-8859-1?q?hello=0Aworld?=')
3116 # Test a non-ASCII character
3117 eq(he(b'hello\xc7there'), '=?iso-8859-1?q?hello=C7there?=')
3118
3119 def test_decode(self):
3120 eq = self.assertEqual
3121 eq(quoprimime.decode(''), '')
3122 eq(quoprimime.decode('hello'), 'hello')
3123 eq(quoprimime.decode('hello', 'X'), 'hello')
3124 eq(quoprimime.decode('hello\nworld', 'X'), 'helloXworld')
3125
3126 def test_encode(self):
3127 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003128 eq(quoprimime.body_encode(''), '')
3129 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003130 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003131 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003132 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003133 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003134xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3135 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3136x xxxx xxxx xxxx xxxx=20""")
3137 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003138 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3139 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003140xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3141 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3142x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003143 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003144one line
3145
3146two line"""), """\
3147one line
3148
3149two line""")
3150
3151
3152
3153# Test the Charset class
3154class TestCharset(unittest.TestCase):
3155 def tearDown(self):
3156 from email import charset as CharsetModule
3157 try:
3158 del CharsetModule.CHARSETS['fake']
3159 except KeyError:
3160 pass
3161
Guido van Rossum9604e662007-08-30 03:46:43 +00003162 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003163 eq = self.assertEqual
3164 # Make sure us-ascii = no Unicode conversion
3165 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00003166 eq(c.header_encode('Hello World!'), 'Hello World!')
3167 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003168 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00003169 self.assertRaises(UnicodeError, c.header_encode, s)
3170 c = Charset('utf-8')
3171 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003172
3173 def test_body_encode(self):
3174 eq = self.assertEqual
3175 # Try a charset with QP body encoding
3176 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003177 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003178 # Try a charset with Base64 body encoding
3179 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003180 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003181 # Try a charset with None body encoding
3182 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003183 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003184 # Try the convert argument, where input codec != output codec
3185 c = Charset('euc-jp')
3186 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00003187 # XXX FIXME
3188## try:
3189## eq('\x1b$B5FCO;~IW\x1b(B',
3190## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
3191## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
3192## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
3193## except LookupError:
3194## # We probably don't have the Japanese codecs installed
3195## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003196 # Testing SF bug #625509, which we have to fake, since there are no
3197 # built-in encodings where the header encoding is QP but the body
3198 # encoding is not.
3199 from email import charset as CharsetModule
3200 CharsetModule.add_charset('fake', CharsetModule.QP, None)
3201 c = Charset('fake')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003202 eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003203
3204 def test_unicode_charset_name(self):
3205 charset = Charset('us-ascii')
3206 self.assertEqual(str(charset), 'us-ascii')
3207 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
3208
3209
3210
3211# Test multilingual MIME headers.
3212class TestHeader(TestEmailBase):
3213 def test_simple(self):
3214 eq = self.ndiffAssertEqual
3215 h = Header('Hello World!')
3216 eq(h.encode(), 'Hello World!')
3217 h.append(' Goodbye World!')
3218 eq(h.encode(), 'Hello World! Goodbye World!')
3219
3220 def test_simple_surprise(self):
3221 eq = self.ndiffAssertEqual
3222 h = Header('Hello World!')
3223 eq(h.encode(), 'Hello World!')
3224 h.append('Goodbye World!')
3225 eq(h.encode(), 'Hello World! Goodbye World!')
3226
3227 def test_header_needs_no_decoding(self):
3228 h = 'no decoding needed'
3229 self.assertEqual(decode_header(h), [(h, None)])
3230
3231 def test_long(self):
3232 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
3233 maxlinelen=76)
3234 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003235 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003236
3237 def test_multilingual(self):
3238 eq = self.ndiffAssertEqual
3239 g = Charset("iso-8859-1")
3240 cz = Charset("iso-8859-2")
3241 utf8 = Charset("utf-8")
3242 g_head = (b'Die Mieter treten hier ein werden mit einem '
3243 b'Foerderband komfortabel den Korridor entlang, '
3244 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
3245 b'gegen die rotierenden Klingen bef\xf6rdert. ')
3246 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
3247 b'd\xf9vtipu.. ')
3248 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
3249 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
3250 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
3251 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
3252 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
3253 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
3254 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
3255 '\u3044\u307e\u3059\u3002')
3256 h = Header(g_head, g)
3257 h.append(cz_head, cz)
3258 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00003259 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003260 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00003261=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
3262 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
3263 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
3264 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003265 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
3266 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
3267 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
3268 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00003269 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
3270 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
3271 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
3272 decoded = decode_header(enc)
3273 eq(len(decoded), 3)
3274 eq(decoded[0], (g_head, 'iso-8859-1'))
3275 eq(decoded[1], (cz_head, 'iso-8859-2'))
3276 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003277 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00003278 eq(ustr,
3279 (b'Die Mieter treten hier ein werden mit einem Foerderband '
3280 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
3281 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
3282 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
3283 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
3284 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
3285 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
3286 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
3287 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
3288 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
3289 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
3290 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
3291 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
3292 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
3293 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
3294 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
3295 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003296 # Test make_header()
3297 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00003298 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003299
3300 def test_empty_header_encode(self):
3301 h = Header()
3302 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00003303
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003304 def test_header_ctor_default_args(self):
3305 eq = self.ndiffAssertEqual
3306 h = Header()
3307 eq(h, '')
3308 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00003309 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003310
3311 def test_explicit_maxlinelen(self):
3312 eq = self.ndiffAssertEqual
3313 hstr = ('A very long line that must get split to something other '
3314 'than at the 76th character boundary to test the non-default '
3315 'behavior')
3316 h = Header(hstr)
3317 eq(h.encode(), '''\
3318A very long line that must get split to something other than at the 76th
3319 character boundary to test the non-default behavior''')
3320 eq(str(h), hstr)
3321 h = Header(hstr, header_name='Subject')
3322 eq(h.encode(), '''\
3323A very long line that must get split to something other than at the
3324 76th character boundary to test the non-default behavior''')
3325 eq(str(h), hstr)
3326 h = Header(hstr, maxlinelen=1024, header_name='Subject')
3327 eq(h.encode(), hstr)
3328 eq(str(h), hstr)
3329
Guido van Rossum9604e662007-08-30 03:46:43 +00003330 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003331 eq = self.ndiffAssertEqual
3332 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003333 x = 'xxxx ' * 20
3334 h.append(x)
3335 s = h.encode()
3336 eq(s, """\
3337=?iso-8859-1?q?xxx?=
3338 =?iso-8859-1?q?x_?=
3339 =?iso-8859-1?q?xx?=
3340 =?iso-8859-1?q?xx?=
3341 =?iso-8859-1?q?_x?=
3342 =?iso-8859-1?q?xx?=
3343 =?iso-8859-1?q?x_?=
3344 =?iso-8859-1?q?xx?=
3345 =?iso-8859-1?q?xx?=
3346 =?iso-8859-1?q?_x?=
3347 =?iso-8859-1?q?xx?=
3348 =?iso-8859-1?q?x_?=
3349 =?iso-8859-1?q?xx?=
3350 =?iso-8859-1?q?xx?=
3351 =?iso-8859-1?q?_x?=
3352 =?iso-8859-1?q?xx?=
3353 =?iso-8859-1?q?x_?=
3354 =?iso-8859-1?q?xx?=
3355 =?iso-8859-1?q?xx?=
3356 =?iso-8859-1?q?_x?=
3357 =?iso-8859-1?q?xx?=
3358 =?iso-8859-1?q?x_?=
3359 =?iso-8859-1?q?xx?=
3360 =?iso-8859-1?q?xx?=
3361 =?iso-8859-1?q?_x?=
3362 =?iso-8859-1?q?xx?=
3363 =?iso-8859-1?q?x_?=
3364 =?iso-8859-1?q?xx?=
3365 =?iso-8859-1?q?xx?=
3366 =?iso-8859-1?q?_x?=
3367 =?iso-8859-1?q?xx?=
3368 =?iso-8859-1?q?x_?=
3369 =?iso-8859-1?q?xx?=
3370 =?iso-8859-1?q?xx?=
3371 =?iso-8859-1?q?_x?=
3372 =?iso-8859-1?q?xx?=
3373 =?iso-8859-1?q?x_?=
3374 =?iso-8859-1?q?xx?=
3375 =?iso-8859-1?q?xx?=
3376 =?iso-8859-1?q?_x?=
3377 =?iso-8859-1?q?xx?=
3378 =?iso-8859-1?q?x_?=
3379 =?iso-8859-1?q?xx?=
3380 =?iso-8859-1?q?xx?=
3381 =?iso-8859-1?q?_x?=
3382 =?iso-8859-1?q?xx?=
3383 =?iso-8859-1?q?x_?=
3384 =?iso-8859-1?q?xx?=
3385 =?iso-8859-1?q?xx?=
3386 =?iso-8859-1?q?_?=""")
3387 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003388 h = Header(charset='iso-8859-1', maxlinelen=40)
3389 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003390 s = h.encode()
3391 eq(s, """\
3392=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
3393 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
3394 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
3395 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
3396 =?iso-8859-1?q?_xxxx_xxxx_?=""")
3397 eq(x, str(make_header(decode_header(s))))
3398
3399 def test_base64_splittable(self):
3400 eq = self.ndiffAssertEqual
3401 h = Header(charset='koi8-r', maxlinelen=20)
3402 x = 'xxxx ' * 20
3403 h.append(x)
3404 s = h.encode()
3405 eq(s, """\
3406=?koi8-r?b?eHh4?=
3407 =?koi8-r?b?eCB4?=
3408 =?koi8-r?b?eHh4?=
3409 =?koi8-r?b?IHh4?=
3410 =?koi8-r?b?eHgg?=
3411 =?koi8-r?b?eHh4?=
3412 =?koi8-r?b?eCB4?=
3413 =?koi8-r?b?eHh4?=
3414 =?koi8-r?b?IHh4?=
3415 =?koi8-r?b?eHgg?=
3416 =?koi8-r?b?eHh4?=
3417 =?koi8-r?b?eCB4?=
3418 =?koi8-r?b?eHh4?=
3419 =?koi8-r?b?IHh4?=
3420 =?koi8-r?b?eHgg?=
3421 =?koi8-r?b?eHh4?=
3422 =?koi8-r?b?eCB4?=
3423 =?koi8-r?b?eHh4?=
3424 =?koi8-r?b?IHh4?=
3425 =?koi8-r?b?eHgg?=
3426 =?koi8-r?b?eHh4?=
3427 =?koi8-r?b?eCB4?=
3428 =?koi8-r?b?eHh4?=
3429 =?koi8-r?b?IHh4?=
3430 =?koi8-r?b?eHgg?=
3431 =?koi8-r?b?eHh4?=
3432 =?koi8-r?b?eCB4?=
3433 =?koi8-r?b?eHh4?=
3434 =?koi8-r?b?IHh4?=
3435 =?koi8-r?b?eHgg?=
3436 =?koi8-r?b?eHh4?=
3437 =?koi8-r?b?eCB4?=
3438 =?koi8-r?b?eHh4?=
3439 =?koi8-r?b?IA==?=""")
3440 eq(x, str(make_header(decode_header(s))))
3441 h = Header(charset='koi8-r', maxlinelen=40)
3442 h.append(x)
3443 s = h.encode()
3444 eq(s, """\
3445=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
3446 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
3447 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
3448 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
3449 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
3450 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
3451 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003452
3453 def test_us_ascii_header(self):
3454 eq = self.assertEqual
3455 s = 'hello'
3456 x = decode_header(s)
3457 eq(x, [('hello', None)])
3458 h = make_header(x)
3459 eq(s, h.encode())
3460
3461 def test_string_charset(self):
3462 eq = self.assertEqual
3463 h = Header()
3464 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00003465 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003466
3467## def test_unicode_error(self):
3468## raises = self.assertRaises
3469## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
3470## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
3471## h = Header()
3472## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
3473## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
3474## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
3475
3476 def test_utf8_shortest(self):
3477 eq = self.assertEqual
3478 h = Header('p\xf6stal', 'utf-8')
3479 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
3480 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
3481 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
3482
3483 def test_bad_8bit_header(self):
3484 raises = self.assertRaises
3485 eq = self.assertEqual
3486 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
3487 raises(UnicodeError, Header, x)
3488 h = Header()
3489 raises(UnicodeError, h.append, x)
3490 e = x.decode('utf-8', 'replace')
3491 eq(str(Header(x, errors='replace')), e)
3492 h.append(x, errors='replace')
3493 eq(str(h), e)
3494
3495 def test_encoded_adjacent_nonencoded(self):
3496 eq = self.assertEqual
3497 h = Header()
3498 h.append('hello', 'iso-8859-1')
3499 h.append('world')
3500 s = h.encode()
3501 eq(s, '=?iso-8859-1?q?hello?= world')
3502 h = make_header(decode_header(s))
3503 eq(h.encode(), s)
3504
3505 def test_whitespace_eater(self):
3506 eq = self.assertEqual
3507 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
3508 parts = decode_header(s)
3509 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
3510 hdr = make_header(parts)
3511 eq(hdr.encode(),
3512 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
3513
3514 def test_broken_base64_header(self):
3515 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00003516 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003517 raises(errors.HeaderParseError, decode_header, s)
3518
3519
3520
3521# Test RFC 2231 header parameters (en/de)coding
3522class TestRFC2231(TestEmailBase):
3523 def test_get_param(self):
3524 eq = self.assertEqual
3525 msg = self._msgobj('msg_29.txt')
3526 eq(msg.get_param('title'),
3527 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3528 eq(msg.get_param('title', unquote=False),
3529 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
3530
3531 def test_set_param(self):
3532 eq = self.ndiffAssertEqual
3533 msg = Message()
3534 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3535 charset='us-ascii')
3536 eq(msg.get_param('title'),
3537 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
3538 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3539 charset='us-ascii', language='en')
3540 eq(msg.get_param('title'),
3541 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3542 msg = self._msgobj('msg_01.txt')
3543 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3544 charset='us-ascii', language='en')
3545 eq(msg.as_string(maxheaderlen=78), """\
3546Return-Path: <bbb@zzz.org>
3547Delivered-To: bbb@zzz.org
3548Received: by mail.zzz.org (Postfix, from userid 889)
3549\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3550MIME-Version: 1.0
3551Content-Transfer-Encoding: 7bit
3552Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3553From: bbb@ddd.com (John X. Doe)
3554To: bbb@zzz.org
3555Subject: This is a test message
3556Date: Fri, 4 May 2001 14:05:44 -0400
3557Content-Type: text/plain; charset=us-ascii;
3558 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3559
3560
3561Hi,
3562
3563Do you like this message?
3564
3565-Me
3566""")
3567
3568 def test_del_param(self):
3569 eq = self.ndiffAssertEqual
3570 msg = self._msgobj('msg_01.txt')
3571 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
3572 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3573 charset='us-ascii', language='en')
3574 msg.del_param('foo', header='Content-Type')
3575 eq(msg.as_string(maxheaderlen=78), """\
3576Return-Path: <bbb@zzz.org>
3577Delivered-To: bbb@zzz.org
3578Received: by mail.zzz.org (Postfix, from userid 889)
3579\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3580MIME-Version: 1.0
3581Content-Transfer-Encoding: 7bit
3582Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3583From: bbb@ddd.com (John X. Doe)
3584To: bbb@zzz.org
3585Subject: This is a test message
3586Date: Fri, 4 May 2001 14:05:44 -0400
3587Content-Type: text/plain; charset="us-ascii";
3588 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3589
3590
3591Hi,
3592
3593Do you like this message?
3594
3595-Me
3596""")
3597
3598 def test_rfc2231_get_content_charset(self):
3599 eq = self.assertEqual
3600 msg = self._msgobj('msg_32.txt')
3601 eq(msg.get_content_charset(), 'us-ascii')
3602
3603 def test_rfc2231_no_language_or_charset(self):
3604 m = '''\
3605Content-Transfer-Encoding: 8bit
3606Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
3607Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
3608
3609'''
3610 msg = email.message_from_string(m)
3611 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003612 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003613 self.assertEqual(
3614 param,
3615 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
3616
3617 def test_rfc2231_no_language_or_charset_in_filename(self):
3618 m = '''\
3619Content-Disposition: inline;
3620\tfilename*0*="''This%20is%20even%20more%20";
3621\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3622\tfilename*2="is it not.pdf"
3623
3624'''
3625 msg = email.message_from_string(m)
3626 self.assertEqual(msg.get_filename(),
3627 'This is even more ***fun*** is it not.pdf')
3628
3629 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
3630 m = '''\
3631Content-Disposition: inline;
3632\tfilename*0*="''This%20is%20even%20more%20";
3633\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3634\tfilename*2="is it not.pdf"
3635
3636'''
3637 msg = email.message_from_string(m)
3638 self.assertEqual(msg.get_filename(),
3639 'This is even more ***fun*** is it not.pdf')
3640
3641 def test_rfc2231_partly_encoded(self):
3642 m = '''\
3643Content-Disposition: inline;
3644\tfilename*0="''This%20is%20even%20more%20";
3645\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3646\tfilename*2="is it not.pdf"
3647
3648'''
3649 msg = email.message_from_string(m)
3650 self.assertEqual(
3651 msg.get_filename(),
3652 'This%20is%20even%20more%20***fun*** is it not.pdf')
3653
3654 def test_rfc2231_partly_nonencoded(self):
3655 m = '''\
3656Content-Disposition: inline;
3657\tfilename*0="This%20is%20even%20more%20";
3658\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
3659\tfilename*2="is it not.pdf"
3660
3661'''
3662 msg = email.message_from_string(m)
3663 self.assertEqual(
3664 msg.get_filename(),
3665 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
3666
3667 def test_rfc2231_no_language_or_charset_in_boundary(self):
3668 m = '''\
3669Content-Type: multipart/alternative;
3670\tboundary*0*="''This%20is%20even%20more%20";
3671\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
3672\tboundary*2="is it not.pdf"
3673
3674'''
3675 msg = email.message_from_string(m)
3676 self.assertEqual(msg.get_boundary(),
3677 'This is even more ***fun*** is it not.pdf')
3678
3679 def test_rfc2231_no_language_or_charset_in_charset(self):
3680 # This is a nonsensical charset value, but tests the code anyway
3681 m = '''\
3682Content-Type: text/plain;
3683\tcharset*0*="This%20is%20even%20more%20";
3684\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
3685\tcharset*2="is it not.pdf"
3686
3687'''
3688 msg = email.message_from_string(m)
3689 self.assertEqual(msg.get_content_charset(),
3690 'this is even more ***fun*** is it not.pdf')
3691
3692 def test_rfc2231_bad_encoding_in_filename(self):
3693 m = '''\
3694Content-Disposition: inline;
3695\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
3696\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3697\tfilename*2="is it not.pdf"
3698
3699'''
3700 msg = email.message_from_string(m)
3701 self.assertEqual(msg.get_filename(),
3702 'This is even more ***fun*** is it not.pdf')
3703
3704 def test_rfc2231_bad_encoding_in_charset(self):
3705 m = """\
3706Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
3707
3708"""
3709 msg = email.message_from_string(m)
3710 # This should return None because non-ascii characters in the charset
3711 # are not allowed.
3712 self.assertEqual(msg.get_content_charset(), None)
3713
3714 def test_rfc2231_bad_character_in_charset(self):
3715 m = """\
3716Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
3717
3718"""
3719 msg = email.message_from_string(m)
3720 # This should return None because non-ascii characters in the charset
3721 # are not allowed.
3722 self.assertEqual(msg.get_content_charset(), None)
3723
3724 def test_rfc2231_bad_character_in_filename(self):
3725 m = '''\
3726Content-Disposition: inline;
3727\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
3728\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3729\tfilename*2*="is it not.pdf%E2"
3730
3731'''
3732 msg = email.message_from_string(m)
3733 self.assertEqual(msg.get_filename(),
3734 'This is even more ***fun*** is it not.pdf\ufffd')
3735
3736 def test_rfc2231_unknown_encoding(self):
3737 m = """\
3738Content-Transfer-Encoding: 8bit
3739Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
3740
3741"""
3742 msg = email.message_from_string(m)
3743 self.assertEqual(msg.get_filename(), 'myfile.txt')
3744
3745 def test_rfc2231_single_tick_in_filename_extended(self):
3746 eq = self.assertEqual
3747 m = """\
3748Content-Type: application/x-foo;
3749\tname*0*=\"Frank's\"; name*1*=\" Document\"
3750
3751"""
3752 msg = email.message_from_string(m)
3753 charset, language, s = msg.get_param('name')
3754 eq(charset, None)
3755 eq(language, None)
3756 eq(s, "Frank's Document")
3757
3758 def test_rfc2231_single_tick_in_filename(self):
3759 m = """\
3760Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
3761
3762"""
3763 msg = email.message_from_string(m)
3764 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003765 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003766 self.assertEqual(param, "Frank's Document")
3767
3768 def test_rfc2231_tick_attack_extended(self):
3769 eq = self.assertEqual
3770 m = """\
3771Content-Type: application/x-foo;
3772\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
3773
3774"""
3775 msg = email.message_from_string(m)
3776 charset, language, s = msg.get_param('name')
3777 eq(charset, 'us-ascii')
3778 eq(language, 'en-us')
3779 eq(s, "Frank's Document")
3780
3781 def test_rfc2231_tick_attack(self):
3782 m = """\
3783Content-Type: application/x-foo;
3784\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
3785
3786"""
3787 msg = email.message_from_string(m)
3788 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003789 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003790 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
3791
3792 def test_rfc2231_no_extended_values(self):
3793 eq = self.assertEqual
3794 m = """\
3795Content-Type: application/x-foo; name=\"Frank's Document\"
3796
3797"""
3798 msg = email.message_from_string(m)
3799 eq(msg.get_param('name'), "Frank's Document")
3800
3801 def test_rfc2231_encoded_then_unencoded_segments(self):
3802 eq = self.assertEqual
3803 m = """\
3804Content-Type: application/x-foo;
3805\tname*0*=\"us-ascii'en-us'My\";
3806\tname*1=\" Document\";
3807\tname*2*=\" For You\"
3808
3809"""
3810 msg = email.message_from_string(m)
3811 charset, language, s = msg.get_param('name')
3812 eq(charset, 'us-ascii')
3813 eq(language, 'en-us')
3814 eq(s, 'My Document For You')
3815
3816 def test_rfc2231_unencoded_then_encoded_segments(self):
3817 eq = self.assertEqual
3818 m = """\
3819Content-Type: application/x-foo;
3820\tname*0=\"us-ascii'en-us'My\";
3821\tname*1*=\" Document\";
3822\tname*2*=\" For You\"
3823
3824"""
3825 msg = email.message_from_string(m)
3826 charset, language, s = msg.get_param('name')
3827 eq(charset, 'us-ascii')
3828 eq(language, 'en-us')
3829 eq(s, 'My Document For You')
3830
3831
3832
R. David Murraya8f480f2010-01-16 18:30:03 +00003833# Tests to ensure that signed parts of an email are completely preserved, as
3834# required by RFC1847 section 2.1. Note that these are incomplete, because the
3835# email package does not currently always preserve the body. See issue 1670765.
3836class TestSigned(TestEmailBase):
3837
3838 def _msg_and_obj(self, filename):
3839 with openfile(findfile(filename)) as fp:
3840 original = fp.read()
3841 msg = email.message_from_string(original)
3842 return original, msg
3843
3844 def _signed_parts_eq(self, original, result):
3845 # Extract the first mime part of each message
3846 import re
3847 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
3848 inpart = repart.search(original).group(2)
3849 outpart = repart.search(result).group(2)
3850 self.assertEqual(outpart, inpart)
3851
3852 def test_long_headers_as_string(self):
3853 original, msg = self._msg_and_obj('msg_45.txt')
3854 result = msg.as_string()
3855 self._signed_parts_eq(original, result)
3856
3857 def test_long_headers_as_string_maxheaderlen(self):
3858 original, msg = self._msg_and_obj('msg_45.txt')
3859 result = msg.as_string(maxheaderlen=60)
3860 self._signed_parts_eq(original, result)
3861
3862 def test_long_headers_flatten(self):
3863 original, msg = self._msg_and_obj('msg_45.txt')
3864 fp = StringIO()
3865 Generator(fp).flatten(msg)
3866 result = fp.getvalue()
3867 self._signed_parts_eq(original, result)
3868
3869
3870
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003871def _testclasses():
3872 mod = sys.modules[__name__]
3873 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
3874
3875
3876def suite():
3877 suite = unittest.TestSuite()
3878 for testclass in _testclasses():
3879 suite.addTest(unittest.makeSuite(testclass))
3880 return suite
3881
3882
3883def test_main():
3884 for testclass in _testclasses():
3885 run_unittest(testclass)
3886
3887
3888
3889if __name__ == '__main__':
3890 unittest.main(defaultTest='suite')