blob: e5e51c6ffcb6bfd45c1796666abfd616ccc9acd9 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
6import sys
7import time
8import base64
9import difflib
10import unittest
11import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000012import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000013
R. David Murray96fd54e2010-10-08 15:55:28 +000014from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000015from itertools import chain
16
17import email
18
19from email.charset import Charset
20from email.header import Header, decode_header, make_header
21from email.parser import Parser, HeaderParser
22from email.generator import Generator, DecodedGenerator
23from email.message import Message
24from email.mime.application import MIMEApplication
25from email.mime.audio import MIMEAudio
26from email.mime.text import MIMEText
27from email.mime.image import MIMEImage
28from email.mime.base import MIMEBase
29from email.mime.message import MIMEMessage
30from email.mime.multipart import MIMEMultipart
31from email import utils
32from email import errors
33from email import encoders
34from email import iterators
35from email import base64mime
36from email import quoprimime
37
R. David Murray96fd54e2010-10-08 15:55:28 +000038from test.support import findfile, run_unittest, unlink
Guido van Rossum8b3febe2007-08-30 01:15:14 +000039from email.test import __file__ as landmark
40
41
42NL = '\n'
43EMPTYSTRING = ''
44SPACE = ' '
45
46
47
48def openfile(filename, *args, **kws):
49 path = os.path.join(os.path.dirname(landmark), 'data', filename)
50 return open(path, *args, **kws)
51
52
53
54# Base test class
55class TestEmailBase(unittest.TestCase):
56 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000057 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000058 if first != second:
59 sfirst = str(first)
60 ssecond = str(second)
61 rfirst = [repr(line) for line in sfirst.splitlines()]
62 rsecond = [repr(line) for line in ssecond.splitlines()]
63 diff = difflib.ndiff(rfirst, rsecond)
64 raise self.failureException(NL + NL.join(diff))
65
66 def _msgobj(self, filename):
67 with openfile(findfile(filename)) as fp:
68 return email.message_from_file(fp)
69
70
71
72# Test various aspects of the Message class's API
73class TestMessageAPI(TestEmailBase):
74 def test_get_all(self):
75 eq = self.assertEqual
76 msg = self._msgobj('msg_20.txt')
77 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
78 eq(msg.get_all('xx', 'n/a'), 'n/a')
79
80 def test_getset_charset(self):
81 eq = self.assertEqual
82 msg = Message()
83 eq(msg.get_charset(), None)
84 charset = Charset('iso-8859-1')
85 msg.set_charset(charset)
86 eq(msg['mime-version'], '1.0')
87 eq(msg.get_content_type(), 'text/plain')
88 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
89 eq(msg.get_param('charset'), 'iso-8859-1')
90 eq(msg['content-transfer-encoding'], 'quoted-printable')
91 eq(msg.get_charset().input_charset, 'iso-8859-1')
92 # Remove the charset
93 msg.set_charset(None)
94 eq(msg.get_charset(), None)
95 eq(msg['content-type'], 'text/plain')
96 # Try adding a charset when there's already MIME headers present
97 msg = Message()
98 msg['MIME-Version'] = '2.0'
99 msg['Content-Type'] = 'text/x-weird'
100 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
101 msg.set_charset(charset)
102 eq(msg['mime-version'], '2.0')
103 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
104 eq(msg['content-transfer-encoding'], 'quinted-puntable')
105
106 def test_set_charset_from_string(self):
107 eq = self.assertEqual
108 msg = Message()
109 msg.set_charset('us-ascii')
110 eq(msg.get_charset().input_charset, 'us-ascii')
111 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
112
113 def test_set_payload_with_charset(self):
114 msg = Message()
115 charset = Charset('iso-8859-1')
116 msg.set_payload('This is a string payload', charset)
117 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
118
119 def test_get_charsets(self):
120 eq = self.assertEqual
121
122 msg = self._msgobj('msg_08.txt')
123 charsets = msg.get_charsets()
124 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
125
126 msg = self._msgobj('msg_09.txt')
127 charsets = msg.get_charsets('dingbat')
128 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
129 'koi8-r'])
130
131 msg = self._msgobj('msg_12.txt')
132 charsets = msg.get_charsets()
133 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
134 'iso-8859-3', 'us-ascii', 'koi8-r'])
135
136 def test_get_filename(self):
137 eq = self.assertEqual
138
139 msg = self._msgobj('msg_04.txt')
140 filenames = [p.get_filename() for p in msg.get_payload()]
141 eq(filenames, ['msg.txt', 'msg.txt'])
142
143 msg = self._msgobj('msg_07.txt')
144 subpart = msg.get_payload(1)
145 eq(subpart.get_filename(), 'dingusfish.gif')
146
147 def test_get_filename_with_name_parameter(self):
148 eq = self.assertEqual
149
150 msg = self._msgobj('msg_44.txt')
151 filenames = [p.get_filename() for p in msg.get_payload()]
152 eq(filenames, ['msg.txt', 'msg.txt'])
153
154 def test_get_boundary(self):
155 eq = self.assertEqual
156 msg = self._msgobj('msg_07.txt')
157 # No quotes!
158 eq(msg.get_boundary(), 'BOUNDARY')
159
160 def test_set_boundary(self):
161 eq = self.assertEqual
162 # This one has no existing boundary parameter, but the Content-Type:
163 # header appears fifth.
164 msg = self._msgobj('msg_01.txt')
165 msg.set_boundary('BOUNDARY')
166 header, value = msg.items()[4]
167 eq(header.lower(), 'content-type')
168 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
169 # This one has a Content-Type: header, with a boundary, stuck in the
170 # middle of its headers. Make sure the order is preserved; it should
171 # be fifth.
172 msg = self._msgobj('msg_04.txt')
173 msg.set_boundary('BOUNDARY')
174 header, value = msg.items()[4]
175 eq(header.lower(), 'content-type')
176 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
177 # And this one has no Content-Type: header at all.
178 msg = self._msgobj('msg_03.txt')
179 self.assertRaises(errors.HeaderParseError,
180 msg.set_boundary, 'BOUNDARY')
181
R. David Murray57c45ac2010-02-21 04:39:40 +0000182 def test_message_rfc822_only(self):
183 # Issue 7970: message/rfc822 not in multipart parsed by
184 # HeaderParser caused an exception when flattened.
185 fp = openfile(findfile('msg_46.txt'))
186 msgdata = fp.read()
187 parser = HeaderParser()
188 msg = parser.parsestr(msgdata)
189 out = StringIO()
190 gen = Generator(out, True, 0)
191 gen.flatten(msg, False)
192 self.assertEqual(out.getvalue(), msgdata)
193
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000194 def test_get_decoded_payload(self):
195 eq = self.assertEqual
196 msg = self._msgobj('msg_10.txt')
197 # The outer message is a multipart
198 eq(msg.get_payload(decode=True), None)
199 # Subpart 1 is 7bit encoded
200 eq(msg.get_payload(0).get_payload(decode=True),
201 b'This is a 7bit encoded message.\n')
202 # Subpart 2 is quopri
203 eq(msg.get_payload(1).get_payload(decode=True),
204 b'\xa1This is a Quoted Printable encoded message!\n')
205 # Subpart 3 is base64
206 eq(msg.get_payload(2).get_payload(decode=True),
207 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000208 # Subpart 4 is base64 with a trailing newline, which
209 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000210 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000211 b'This is a Base64 encoded message.\n')
212 # Subpart 5 has no Content-Transfer-Encoding: header.
213 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000214 b'This has no Content-Transfer-Encoding: header.\n')
215
216 def test_get_decoded_uu_payload(self):
217 eq = self.assertEqual
218 msg = Message()
219 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
220 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
221 msg['content-transfer-encoding'] = cte
222 eq(msg.get_payload(decode=True), b'hello world')
223 # Now try some bogus data
224 msg.set_payload('foo')
225 eq(msg.get_payload(decode=True), b'foo')
226
227 def test_decoded_generator(self):
228 eq = self.assertEqual
229 msg = self._msgobj('msg_07.txt')
230 with openfile('msg_17.txt') as fp:
231 text = fp.read()
232 s = StringIO()
233 g = DecodedGenerator(s)
234 g.flatten(msg)
235 eq(s.getvalue(), text)
236
237 def test__contains__(self):
238 msg = Message()
239 msg['From'] = 'Me'
240 msg['to'] = 'You'
241 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000242 self.assertTrue('from' in msg)
243 self.assertTrue('From' in msg)
244 self.assertTrue('FROM' in msg)
245 self.assertTrue('to' in msg)
246 self.assertTrue('To' in msg)
247 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000248
249 def test_as_string(self):
250 eq = self.ndiffAssertEqual
251 msg = self._msgobj('msg_01.txt')
252 with openfile('msg_01.txt') as fp:
253 text = fp.read()
254 eq(text, str(msg))
255 fullrepr = msg.as_string(unixfrom=True)
256 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000257 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000258 eq(text, NL.join(lines[1:]))
259
260 def test_bad_param(self):
261 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
262 self.assertEqual(msg.get_param('baz'), '')
263
264 def test_missing_filename(self):
265 msg = email.message_from_string("From: foo\n")
266 self.assertEqual(msg.get_filename(), None)
267
268 def test_bogus_filename(self):
269 msg = email.message_from_string(
270 "Content-Disposition: blarg; filename\n")
271 self.assertEqual(msg.get_filename(), '')
272
273 def test_missing_boundary(self):
274 msg = email.message_from_string("From: foo\n")
275 self.assertEqual(msg.get_boundary(), None)
276
277 def test_get_params(self):
278 eq = self.assertEqual
279 msg = email.message_from_string(
280 'X-Header: foo=one; bar=two; baz=three\n')
281 eq(msg.get_params(header='x-header'),
282 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
283 msg = email.message_from_string(
284 'X-Header: foo; bar=one; baz=two\n')
285 eq(msg.get_params(header='x-header'),
286 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
287 eq(msg.get_params(), None)
288 msg = email.message_from_string(
289 'X-Header: foo; bar="one"; baz=two\n')
290 eq(msg.get_params(header='x-header'),
291 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
292
293 def test_get_param_liberal(self):
294 msg = Message()
295 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
296 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
297
298 def test_get_param(self):
299 eq = self.assertEqual
300 msg = email.message_from_string(
301 "X-Header: foo=one; bar=two; baz=three\n")
302 eq(msg.get_param('bar', header='x-header'), 'two')
303 eq(msg.get_param('quuz', header='x-header'), None)
304 eq(msg.get_param('quuz'), None)
305 msg = email.message_from_string(
306 'X-Header: foo; bar="one"; baz=two\n')
307 eq(msg.get_param('foo', header='x-header'), '')
308 eq(msg.get_param('bar', header='x-header'), 'one')
309 eq(msg.get_param('baz', header='x-header'), 'two')
310 # XXX: We are not RFC-2045 compliant! We cannot parse:
311 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
312 # msg.get_param("weird")
313 # yet.
314
315 def test_get_param_funky_continuation_lines(self):
316 msg = self._msgobj('msg_22.txt')
317 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
318
319 def test_get_param_with_semis_in_quotes(self):
320 msg = email.message_from_string(
321 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
322 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
323 self.assertEqual(msg.get_param('name', unquote=False),
324 '"Jim&amp;&amp;Jill"')
325
R. David Murrayd48739f2010-04-14 18:59:18 +0000326 def test_get_param_with_quotes(self):
327 msg = email.message_from_string(
328 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
329 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
330 msg = email.message_from_string(
331 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
332 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
333
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000334 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000335 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000336 msg = email.message_from_string('Header: exists')
337 unless('header' in msg)
338 unless('Header' in msg)
339 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000340 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000341
342 def test_set_param(self):
343 eq = self.assertEqual
344 msg = Message()
345 msg.set_param('charset', 'iso-2022-jp')
346 eq(msg.get_param('charset'), 'iso-2022-jp')
347 msg.set_param('importance', 'high value')
348 eq(msg.get_param('importance'), 'high value')
349 eq(msg.get_param('importance', unquote=False), '"high value"')
350 eq(msg.get_params(), [('text/plain', ''),
351 ('charset', 'iso-2022-jp'),
352 ('importance', 'high value')])
353 eq(msg.get_params(unquote=False), [('text/plain', ''),
354 ('charset', '"iso-2022-jp"'),
355 ('importance', '"high value"')])
356 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
357 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
358
359 def test_del_param(self):
360 eq = self.assertEqual
361 msg = self._msgobj('msg_05.txt')
362 eq(msg.get_params(),
363 [('multipart/report', ''), ('report-type', 'delivery-status'),
364 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
365 old_val = msg.get_param("report-type")
366 msg.del_param("report-type")
367 eq(msg.get_params(),
368 [('multipart/report', ''),
369 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
370 msg.set_param("report-type", old_val)
371 eq(msg.get_params(),
372 [('multipart/report', ''),
373 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
374 ('report-type', old_val)])
375
376 def test_del_param_on_other_header(self):
377 msg = Message()
378 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
379 msg.del_param('filename', 'content-disposition')
380 self.assertEqual(msg['content-disposition'], 'attachment')
381
382 def test_set_type(self):
383 eq = self.assertEqual
384 msg = Message()
385 self.assertRaises(ValueError, msg.set_type, 'text')
386 msg.set_type('text/plain')
387 eq(msg['content-type'], 'text/plain')
388 msg.set_param('charset', 'us-ascii')
389 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
390 msg.set_type('text/html')
391 eq(msg['content-type'], 'text/html; charset="us-ascii"')
392
393 def test_set_type_on_other_header(self):
394 msg = Message()
395 msg['X-Content-Type'] = 'text/plain'
396 msg.set_type('application/octet-stream', 'X-Content-Type')
397 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
398
399 def test_get_content_type_missing(self):
400 msg = Message()
401 self.assertEqual(msg.get_content_type(), 'text/plain')
402
403 def test_get_content_type_missing_with_default_type(self):
404 msg = Message()
405 msg.set_default_type('message/rfc822')
406 self.assertEqual(msg.get_content_type(), 'message/rfc822')
407
408 def test_get_content_type_from_message_implicit(self):
409 msg = self._msgobj('msg_30.txt')
410 self.assertEqual(msg.get_payload(0).get_content_type(),
411 'message/rfc822')
412
413 def test_get_content_type_from_message_explicit(self):
414 msg = self._msgobj('msg_28.txt')
415 self.assertEqual(msg.get_payload(0).get_content_type(),
416 'message/rfc822')
417
418 def test_get_content_type_from_message_text_plain_implicit(self):
419 msg = self._msgobj('msg_03.txt')
420 self.assertEqual(msg.get_content_type(), 'text/plain')
421
422 def test_get_content_type_from_message_text_plain_explicit(self):
423 msg = self._msgobj('msg_01.txt')
424 self.assertEqual(msg.get_content_type(), 'text/plain')
425
426 def test_get_content_maintype_missing(self):
427 msg = Message()
428 self.assertEqual(msg.get_content_maintype(), 'text')
429
430 def test_get_content_maintype_missing_with_default_type(self):
431 msg = Message()
432 msg.set_default_type('message/rfc822')
433 self.assertEqual(msg.get_content_maintype(), 'message')
434
435 def test_get_content_maintype_from_message_implicit(self):
436 msg = self._msgobj('msg_30.txt')
437 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
438
439 def test_get_content_maintype_from_message_explicit(self):
440 msg = self._msgobj('msg_28.txt')
441 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
442
443 def test_get_content_maintype_from_message_text_plain_implicit(self):
444 msg = self._msgobj('msg_03.txt')
445 self.assertEqual(msg.get_content_maintype(), 'text')
446
447 def test_get_content_maintype_from_message_text_plain_explicit(self):
448 msg = self._msgobj('msg_01.txt')
449 self.assertEqual(msg.get_content_maintype(), 'text')
450
451 def test_get_content_subtype_missing(self):
452 msg = Message()
453 self.assertEqual(msg.get_content_subtype(), 'plain')
454
455 def test_get_content_subtype_missing_with_default_type(self):
456 msg = Message()
457 msg.set_default_type('message/rfc822')
458 self.assertEqual(msg.get_content_subtype(), 'rfc822')
459
460 def test_get_content_subtype_from_message_implicit(self):
461 msg = self._msgobj('msg_30.txt')
462 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
463
464 def test_get_content_subtype_from_message_explicit(self):
465 msg = self._msgobj('msg_28.txt')
466 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
467
468 def test_get_content_subtype_from_message_text_plain_implicit(self):
469 msg = self._msgobj('msg_03.txt')
470 self.assertEqual(msg.get_content_subtype(), 'plain')
471
472 def test_get_content_subtype_from_message_text_plain_explicit(self):
473 msg = self._msgobj('msg_01.txt')
474 self.assertEqual(msg.get_content_subtype(), 'plain')
475
476 def test_get_content_maintype_error(self):
477 msg = Message()
478 msg['Content-Type'] = 'no-slash-in-this-string'
479 self.assertEqual(msg.get_content_maintype(), 'text')
480
481 def test_get_content_subtype_error(self):
482 msg = Message()
483 msg['Content-Type'] = 'no-slash-in-this-string'
484 self.assertEqual(msg.get_content_subtype(), 'plain')
485
486 def test_replace_header(self):
487 eq = self.assertEqual
488 msg = Message()
489 msg.add_header('First', 'One')
490 msg.add_header('Second', 'Two')
491 msg.add_header('Third', 'Three')
492 eq(msg.keys(), ['First', 'Second', 'Third'])
493 eq(msg.values(), ['One', 'Two', 'Three'])
494 msg.replace_header('Second', 'Twenty')
495 eq(msg.keys(), ['First', 'Second', 'Third'])
496 eq(msg.values(), ['One', 'Twenty', 'Three'])
497 msg.add_header('First', 'Eleven')
498 msg.replace_header('First', 'One Hundred')
499 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
500 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
501 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
502
503 def test_broken_base64_payload(self):
504 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
505 msg = Message()
506 msg['content-type'] = 'audio/x-midi'
507 msg['content-transfer-encoding'] = 'base64'
508 msg.set_payload(x)
509 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000510 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000511
512
513
514# Test the email.encoders module
515class TestEncoders(unittest.TestCase):
516 def test_encode_empty_payload(self):
517 eq = self.assertEqual
518 msg = Message()
519 msg.set_charset('us-ascii')
520 eq(msg['content-transfer-encoding'], '7bit')
521
522 def test_default_cte(self):
523 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000524 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000525 msg = MIMEText('hello world')
526 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000527 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000528 msg = MIMEText('hello \xf8 world')
529 eq(msg['content-transfer-encoding'], '8bit')
530 # And now with a different charset
531 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
532 eq(msg['content-transfer-encoding'], 'quoted-printable')
533
R. David Murraye85200d2010-05-06 01:41:14 +0000534 def test_encode7or8bit(self):
535 # Make sure a charset whose input character set is 8bit but
536 # whose output character set is 7bit gets a transfer-encoding
537 # of 7bit.
538 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000539 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000540 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000541
542
543# Test long header wrapping
544class TestLongHeaders(TestEmailBase):
545 def test_split_long_continuation(self):
546 eq = self.ndiffAssertEqual
547 msg = email.message_from_string("""\
548Subject: bug demonstration
549\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
550\tmore text
551
552test
553""")
554 sfp = StringIO()
555 g = Generator(sfp)
556 g.flatten(msg)
557 eq(sfp.getvalue(), """\
558Subject: bug demonstration
559\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
560\tmore text
561
562test
563""")
564
565 def test_another_long_almost_unsplittable_header(self):
566 eq = self.ndiffAssertEqual
567 hstr = """\
568bug demonstration
569\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
570\tmore text"""
571 h = Header(hstr, continuation_ws='\t')
572 eq(h.encode(), """\
573bug demonstration
574\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
575\tmore text""")
576 h = Header(hstr.replace('\t', ' '))
577 eq(h.encode(), """\
578bug demonstration
579 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
580 more text""")
581
582 def test_long_nonstring(self):
583 eq = self.ndiffAssertEqual
584 g = Charset("iso-8859-1")
585 cz = Charset("iso-8859-2")
586 utf8 = Charset("utf-8")
587 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
588 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
589 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
590 b'bef\xf6rdert. ')
591 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
592 b'd\xf9vtipu.. ')
593 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
594 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
595 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
596 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
597 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
598 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
599 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
600 '\u3044\u307e\u3059\u3002')
601 h = Header(g_head, g, header_name='Subject')
602 h.append(cz_head, cz)
603 h.append(utf8_head, utf8)
604 msg = Message()
605 msg['Subject'] = h
606 sfp = StringIO()
607 g = Generator(sfp)
608 g.flatten(msg)
609 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000610Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
611 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
612 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
613 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
614 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
615 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
616 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
617 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
618 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
619 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
620 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000621
622""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000623 eq(h.encode(maxlinelen=76), """\
624=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
625 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
626 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
627 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
628 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
629 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
630 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
631 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
632 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
633 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
634 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000635
636 def test_long_header_encode(self):
637 eq = self.ndiffAssertEqual
638 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
639 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
640 header_name='X-Foobar-Spoink-Defrobnit')
641 eq(h.encode(), '''\
642wasnipoop; giraffes="very-long-necked-animals";
643 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
644
645 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
646 eq = self.ndiffAssertEqual
647 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
648 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
649 header_name='X-Foobar-Spoink-Defrobnit',
650 continuation_ws='\t')
651 eq(h.encode(), '''\
652wasnipoop; giraffes="very-long-necked-animals";
653 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
654
655 def test_long_header_encode_with_tab_continuation(self):
656 eq = self.ndiffAssertEqual
657 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
658 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
659 header_name='X-Foobar-Spoink-Defrobnit',
660 continuation_ws='\t')
661 eq(h.encode(), '''\
662wasnipoop; giraffes="very-long-necked-animals";
663\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
664
665 def test_header_splitter(self):
666 eq = self.ndiffAssertEqual
667 msg = MIMEText('')
668 # It'd be great if we could use add_header() here, but that doesn't
669 # guarantee an order of the parameters.
670 msg['X-Foobar-Spoink-Defrobnit'] = (
671 'wasnipoop; giraffes="very-long-necked-animals"; '
672 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
673 sfp = StringIO()
674 g = Generator(sfp)
675 g.flatten(msg)
676 eq(sfp.getvalue(), '''\
677Content-Type: text/plain; charset="us-ascii"
678MIME-Version: 1.0
679Content-Transfer-Encoding: 7bit
680X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
681 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
682
683''')
684
685 def test_no_semis_header_splitter(self):
686 eq = self.ndiffAssertEqual
687 msg = Message()
688 msg['From'] = 'test@dom.ain'
689 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
690 msg.set_payload('Test')
691 sfp = StringIO()
692 g = Generator(sfp)
693 g.flatten(msg)
694 eq(sfp.getvalue(), """\
695From: test@dom.ain
696References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
697 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
698
699Test""")
700
701 def test_no_split_long_header(self):
702 eq = self.ndiffAssertEqual
703 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000704 h = Header(hstr)
705 # These come on two lines because Headers are really field value
706 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000707 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000708References:
709 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
710 h = Header('x' * 80)
711 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000712
713 def test_splitting_multiple_long_lines(self):
714 eq = self.ndiffAssertEqual
715 hstr = """\
716from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
717\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
718\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
719"""
720 h = Header(hstr, continuation_ws='\t')
721 eq(h.encode(), """\
722from babylon.socal-raves.org (localhost [127.0.0.1]);
723 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
724 for <mailman-admin@babylon.socal-raves.org>;
725 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
726\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
727 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
728 for <mailman-admin@babylon.socal-raves.org>;
729 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
730\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
731 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
732 for <mailman-admin@babylon.socal-raves.org>;
733 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
734
735 def test_splitting_first_line_only_is_long(self):
736 eq = self.ndiffAssertEqual
737 hstr = """\
738from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
739\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
740\tid 17k4h5-00034i-00
741\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
742 h = Header(hstr, maxlinelen=78, header_name='Received',
743 continuation_ws='\t')
744 eq(h.encode(), """\
745from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
746 helo=cthulhu.gerg.ca)
747\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
748\tid 17k4h5-00034i-00
749\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
750
751 def test_long_8bit_header(self):
752 eq = self.ndiffAssertEqual
753 msg = Message()
754 h = Header('Britische Regierung gibt', 'iso-8859-1',
755 header_name='Subject')
756 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +0000757 eq(h.encode(maxlinelen=76), """\
758=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
759 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000760 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +0000761 eq(msg.as_string(maxheaderlen=76), """\
762Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
763 =?iso-8859-1?q?hore-Windkraftprojekte?=
764
765""")
766 eq(msg.as_string(maxheaderlen=0), """\
767Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000768
769""")
770
771 def test_long_8bit_header_no_charset(self):
772 eq = self.ndiffAssertEqual
773 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +0000774 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
775 'f\xfcr Offshore-Windkraftprojekte '
776 '<a-very-long-address@example.com>')
777 msg['Reply-To'] = header_string
778 self.assertRaises(UnicodeEncodeError, msg.as_string)
779 msg = Message()
780 msg['Reply-To'] = Header(header_string, 'utf-8',
781 header_name='Reply-To')
782 eq(msg.as_string(maxheaderlen=78), """\
783Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
784 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000785
786""")
787
788 def test_long_to_header(self):
789 eq = self.ndiffAssertEqual
790 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
791 '<someone@eecs.umich.edu>,'
792 '"Someone Test #B" <someone@umich.edu>, '
793 '"Someone Test #C" <someone@eecs.umich.edu>, '
794 '"Someone Test #D" <someone@eecs.umich.edu>')
795 msg = Message()
796 msg['To'] = to
797 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +0000798To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000799 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +0000800 "Someone Test #C" <someone@eecs.umich.edu>,
801 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000802
803''')
804
805 def test_long_line_after_append(self):
806 eq = self.ndiffAssertEqual
807 s = 'This is an example of string which has almost the limit of header length.'
808 h = Header(s)
809 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +0000810 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000811This is an example of string which has almost the limit of header length.
812 Add another line.""")
813
814 def test_shorter_line_with_append(self):
815 eq = self.ndiffAssertEqual
816 s = 'This is a shorter line.'
817 h = Header(s)
818 h.append('Add another sentence. (Surprise?)')
819 eq(h.encode(),
820 'This is a shorter line. Add another sentence. (Surprise?)')
821
822 def test_long_field_name(self):
823 eq = self.ndiffAssertEqual
824 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +0000825 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
826 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
827 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
828 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000829 h = Header(gs, 'iso-8859-1', header_name=fn)
830 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +0000831 eq(h.encode(maxlinelen=76), """\
832=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
833 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
834 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
835 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000836
837 def test_long_received_header(self):
838 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
839 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
840 'Wed, 05 Mar 2003 18:10:18 -0700')
841 msg = Message()
842 msg['Received-1'] = Header(h, continuation_ws='\t')
843 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000844 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000845 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000846Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
847 Wed, 05 Mar 2003 18:10:18 -0700
848Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
849 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000850
851""")
852
853 def test_string_headerinst_eq(self):
854 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
855 'tu-muenchen.de> (David Bremner\'s message of '
856 '"Thu, 6 Mar 2003 13:58:21 +0100")')
857 msg = Message()
858 msg['Received-1'] = Header(h, header_name='Received-1',
859 continuation_ws='\t')
860 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000861 # XXX This should be splitting on spaces not commas.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000862 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000863Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
864 6 Mar 2003 13:58:21 +0100\")
865Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
866 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000867
868""")
869
870 def test_long_unbreakable_lines_with_continuation(self):
871 eq = self.ndiffAssertEqual
872 msg = Message()
873 t = """\
874iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
875 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
876 msg['Face-1'] = t
877 msg['Face-2'] = Header(t, header_name='Face-2')
Barry Warsawbef9d212007-08-31 10:55:37 +0000878 # XXX This splitting is all wrong. It the first value line should be
879 # snug against the field name.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000880 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +0000881Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000882 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000883 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +0000884Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +0000885 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000886 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
887
888""")
889
890 def test_another_long_multiline_header(self):
891 eq = self.ndiffAssertEqual
892 m = ('Received: from siimage.com '
893 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +0000894 'Microsoft SMTPSVC(5.0.2195.4905); '
895 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000896 msg = email.message_from_string(m)
897 eq(msg.as_string(maxheaderlen=78), '''\
Barry Warsawbef9d212007-08-31 10:55:37 +0000898Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
899 Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000900
901''')
902
903 def test_long_lines_with_different_header(self):
904 eq = self.ndiffAssertEqual
905 h = ('List-Unsubscribe: '
906 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
907 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
908 '?subject=unsubscribe>')
909 msg = Message()
910 msg['List'] = h
911 msg['List'] = Header(h, header_name='List')
912 eq(msg.as_string(maxheaderlen=78), """\
913List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000914 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000915List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000916 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000917
918""")
919
920
921
922# Test mangling of "From " lines in the body of a message
923class TestFromMangling(unittest.TestCase):
924 def setUp(self):
925 self.msg = Message()
926 self.msg['From'] = 'aaa@bbb.org'
927 self.msg.set_payload("""\
928From the desk of A.A.A.:
929Blah blah blah
930""")
931
932 def test_mangled_from(self):
933 s = StringIO()
934 g = Generator(s, mangle_from_=True)
935 g.flatten(self.msg)
936 self.assertEqual(s.getvalue(), """\
937From: aaa@bbb.org
938
939>From the desk of A.A.A.:
940Blah blah blah
941""")
942
943 def test_dont_mangle_from(self):
944 s = StringIO()
945 g = Generator(s, mangle_from_=False)
946 g.flatten(self.msg)
947 self.assertEqual(s.getvalue(), """\
948From: aaa@bbb.org
949
950From the desk of A.A.A.:
951Blah blah blah
952""")
953
954
955
956# Test the basic MIMEAudio class
957class TestMIMEAudio(unittest.TestCase):
958 def setUp(self):
959 # Make sure we pick up the audiotest.au that lives in email/test/data.
960 # In Python, there's an audiotest.au living in Lib/test but that isn't
961 # included in some binary distros that don't include the test
962 # package. The trailing empty string on the .join() is significant
963 # since findfile() will do a dirname().
964 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
965 with open(findfile('audiotest.au', datadir), 'rb') as fp:
966 self._audiodata = fp.read()
967 self._au = MIMEAudio(self._audiodata)
968
969 def test_guess_minor_type(self):
970 self.assertEqual(self._au.get_content_type(), 'audio/basic')
971
972 def test_encoding(self):
973 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +0000974 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
975 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000976
977 def test_checkSetMinor(self):
978 au = MIMEAudio(self._audiodata, 'fish')
979 self.assertEqual(au.get_content_type(), 'audio/fish')
980
981 def test_add_header(self):
982 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000983 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000984 self._au.add_header('Content-Disposition', 'attachment',
985 filename='audiotest.au')
986 eq(self._au['content-disposition'],
987 'attachment; filename="audiotest.au"')
988 eq(self._au.get_params(header='content-disposition'),
989 [('attachment', ''), ('filename', 'audiotest.au')])
990 eq(self._au.get_param('filename', header='content-disposition'),
991 'audiotest.au')
992 missing = []
993 eq(self._au.get_param('attachment', header='content-disposition'), '')
994 unless(self._au.get_param('foo', failobj=missing,
995 header='content-disposition') is missing)
996 # Try some missing stuff
997 unless(self._au.get_param('foobar', missing) is missing)
998 unless(self._au.get_param('attachment', missing,
999 header='foobar') is missing)
1000
1001
1002
1003# Test the basic MIMEImage class
1004class TestMIMEImage(unittest.TestCase):
1005 def setUp(self):
1006 with openfile('PyBanner048.gif', 'rb') as fp:
1007 self._imgdata = fp.read()
1008 self._im = MIMEImage(self._imgdata)
1009
1010 def test_guess_minor_type(self):
1011 self.assertEqual(self._im.get_content_type(), 'image/gif')
1012
1013 def test_encoding(self):
1014 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001015 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1016 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001017
1018 def test_checkSetMinor(self):
1019 im = MIMEImage(self._imgdata, 'fish')
1020 self.assertEqual(im.get_content_type(), 'image/fish')
1021
1022 def test_add_header(self):
1023 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001024 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001025 self._im.add_header('Content-Disposition', 'attachment',
1026 filename='dingusfish.gif')
1027 eq(self._im['content-disposition'],
1028 'attachment; filename="dingusfish.gif"')
1029 eq(self._im.get_params(header='content-disposition'),
1030 [('attachment', ''), ('filename', 'dingusfish.gif')])
1031 eq(self._im.get_param('filename', header='content-disposition'),
1032 'dingusfish.gif')
1033 missing = []
1034 eq(self._im.get_param('attachment', header='content-disposition'), '')
1035 unless(self._im.get_param('foo', failobj=missing,
1036 header='content-disposition') is missing)
1037 # Try some missing stuff
1038 unless(self._im.get_param('foobar', missing) is missing)
1039 unless(self._im.get_param('attachment', missing,
1040 header='foobar') is missing)
1041
1042
1043
1044# Test the basic MIMEApplication class
1045class TestMIMEApplication(unittest.TestCase):
1046 def test_headers(self):
1047 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001048 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001049 eq(msg.get_content_type(), 'application/octet-stream')
1050 eq(msg['content-transfer-encoding'], 'base64')
1051
1052 def test_body(self):
1053 eq = self.assertEqual
Barry Warsaw8c571042007-08-30 19:17:18 +00001054 bytes = b'\xfa\xfb\xfc\xfd\xfe\xff'
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001055 msg = MIMEApplication(bytes)
R. David Murray7da8f062010-06-04 16:11:08 +00001056 eq(msg.get_payload(), '+vv8/f7/')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001057 eq(msg.get_payload(decode=True), bytes)
1058
1059
1060
1061# Test the basic MIMEText class
1062class TestMIMEText(unittest.TestCase):
1063 def setUp(self):
1064 self._msg = MIMEText('hello there')
1065
1066 def test_types(self):
1067 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001068 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001069 eq(self._msg.get_content_type(), 'text/plain')
1070 eq(self._msg.get_param('charset'), 'us-ascii')
1071 missing = []
1072 unless(self._msg.get_param('foobar', missing) is missing)
1073 unless(self._msg.get_param('charset', missing, header='foobar')
1074 is missing)
1075
1076 def test_payload(self):
1077 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001078 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001079
1080 def test_charset(self):
1081 eq = self.assertEqual
1082 msg = MIMEText('hello there', _charset='us-ascii')
1083 eq(msg.get_charset().input_charset, 'us-ascii')
1084 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1085
R. David Murray850fc852010-06-03 01:58:28 +00001086 def test_7bit_input(self):
1087 eq = self.assertEqual
1088 msg = MIMEText('hello there', _charset='us-ascii')
1089 eq(msg.get_charset().input_charset, 'us-ascii')
1090 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1091
1092 def test_7bit_input_no_charset(self):
1093 eq = self.assertEqual
1094 msg = MIMEText('hello there')
1095 eq(msg.get_charset(), 'us-ascii')
1096 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1097 self.assertTrue('hello there' in msg.as_string())
1098
1099 def test_utf8_input(self):
1100 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1101 eq = self.assertEqual
1102 msg = MIMEText(teststr, _charset='utf-8')
1103 eq(msg.get_charset().output_charset, 'utf-8')
1104 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1105 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1106
1107 @unittest.skip("can't fix because of backward compat in email5, "
1108 "will fix in email6")
1109 def test_utf8_input_no_charset(self):
1110 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1111 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1112
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001113
1114
1115# Test complicated multipart/* messages
1116class TestMultipart(TestEmailBase):
1117 def setUp(self):
1118 with openfile('PyBanner048.gif', 'rb') as fp:
1119 data = fp.read()
1120 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1121 image = MIMEImage(data, name='dingusfish.gif')
1122 image.add_header('content-disposition', 'attachment',
1123 filename='dingusfish.gif')
1124 intro = MIMEText('''\
1125Hi there,
1126
1127This is the dingus fish.
1128''')
1129 container.attach(intro)
1130 container.attach(image)
1131 container['From'] = 'Barry <barry@digicool.com>'
1132 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1133 container['Subject'] = 'Here is your dingus fish'
1134
1135 now = 987809702.54848599
1136 timetuple = time.localtime(now)
1137 if timetuple[-1] == 0:
1138 tzsecs = time.timezone
1139 else:
1140 tzsecs = time.altzone
1141 if tzsecs > 0:
1142 sign = '-'
1143 else:
1144 sign = '+'
1145 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1146 container['Date'] = time.strftime(
1147 '%a, %d %b %Y %H:%M:%S',
1148 time.localtime(now)) + tzoffset
1149 self._msg = container
1150 self._im = image
1151 self._txt = intro
1152
1153 def test_hierarchy(self):
1154 # convenience
1155 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001156 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001157 raises = self.assertRaises
1158 # tests
1159 m = self._msg
1160 unless(m.is_multipart())
1161 eq(m.get_content_type(), 'multipart/mixed')
1162 eq(len(m.get_payload()), 2)
1163 raises(IndexError, m.get_payload, 2)
1164 m0 = m.get_payload(0)
1165 m1 = m.get_payload(1)
1166 unless(m0 is self._txt)
1167 unless(m1 is self._im)
1168 eq(m.get_payload(), [m0, m1])
1169 unless(not m0.is_multipart())
1170 unless(not m1.is_multipart())
1171
1172 def test_empty_multipart_idempotent(self):
1173 text = """\
1174Content-Type: multipart/mixed; boundary="BOUNDARY"
1175MIME-Version: 1.0
1176Subject: A subject
1177To: aperson@dom.ain
1178From: bperson@dom.ain
1179
1180
1181--BOUNDARY
1182
1183
1184--BOUNDARY--
1185"""
1186 msg = Parser().parsestr(text)
1187 self.ndiffAssertEqual(text, msg.as_string())
1188
1189 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1190 outer = MIMEBase('multipart', 'mixed')
1191 outer['Subject'] = 'A subject'
1192 outer['To'] = 'aperson@dom.ain'
1193 outer['From'] = 'bperson@dom.ain'
1194 outer.set_boundary('BOUNDARY')
1195 self.ndiffAssertEqual(outer.as_string(), '''\
1196Content-Type: multipart/mixed; boundary="BOUNDARY"
1197MIME-Version: 1.0
1198Subject: A subject
1199To: aperson@dom.ain
1200From: bperson@dom.ain
1201
1202--BOUNDARY
1203
1204--BOUNDARY--''')
1205
1206 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1207 outer = MIMEBase('multipart', 'mixed')
1208 outer['Subject'] = 'A subject'
1209 outer['To'] = 'aperson@dom.ain'
1210 outer['From'] = 'bperson@dom.ain'
1211 outer.preamble = ''
1212 outer.epilogue = ''
1213 outer.set_boundary('BOUNDARY')
1214 self.ndiffAssertEqual(outer.as_string(), '''\
1215Content-Type: multipart/mixed; boundary="BOUNDARY"
1216MIME-Version: 1.0
1217Subject: A subject
1218To: aperson@dom.ain
1219From: bperson@dom.ain
1220
1221
1222--BOUNDARY
1223
1224--BOUNDARY--
1225''')
1226
1227 def test_one_part_in_a_multipart(self):
1228 eq = self.ndiffAssertEqual
1229 outer = MIMEBase('multipart', 'mixed')
1230 outer['Subject'] = 'A subject'
1231 outer['To'] = 'aperson@dom.ain'
1232 outer['From'] = 'bperson@dom.ain'
1233 outer.set_boundary('BOUNDARY')
1234 msg = MIMEText('hello world')
1235 outer.attach(msg)
1236 eq(outer.as_string(), '''\
1237Content-Type: multipart/mixed; boundary="BOUNDARY"
1238MIME-Version: 1.0
1239Subject: A subject
1240To: aperson@dom.ain
1241From: bperson@dom.ain
1242
1243--BOUNDARY
1244Content-Type: text/plain; charset="us-ascii"
1245MIME-Version: 1.0
1246Content-Transfer-Encoding: 7bit
1247
1248hello world
1249--BOUNDARY--''')
1250
1251 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1252 eq = self.ndiffAssertEqual
1253 outer = MIMEBase('multipart', 'mixed')
1254 outer['Subject'] = 'A subject'
1255 outer['To'] = 'aperson@dom.ain'
1256 outer['From'] = 'bperson@dom.ain'
1257 outer.preamble = ''
1258 msg = MIMEText('hello world')
1259 outer.attach(msg)
1260 outer.set_boundary('BOUNDARY')
1261 eq(outer.as_string(), '''\
1262Content-Type: multipart/mixed; boundary="BOUNDARY"
1263MIME-Version: 1.0
1264Subject: A subject
1265To: aperson@dom.ain
1266From: bperson@dom.ain
1267
1268
1269--BOUNDARY
1270Content-Type: text/plain; charset="us-ascii"
1271MIME-Version: 1.0
1272Content-Transfer-Encoding: 7bit
1273
1274hello world
1275--BOUNDARY--''')
1276
1277
1278 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1279 eq = self.ndiffAssertEqual
1280 outer = MIMEBase('multipart', 'mixed')
1281 outer['Subject'] = 'A subject'
1282 outer['To'] = 'aperson@dom.ain'
1283 outer['From'] = 'bperson@dom.ain'
1284 outer.preamble = None
1285 msg = MIMEText('hello world')
1286 outer.attach(msg)
1287 outer.set_boundary('BOUNDARY')
1288 eq(outer.as_string(), '''\
1289Content-Type: multipart/mixed; boundary="BOUNDARY"
1290MIME-Version: 1.0
1291Subject: A subject
1292To: aperson@dom.ain
1293From: bperson@dom.ain
1294
1295--BOUNDARY
1296Content-Type: text/plain; charset="us-ascii"
1297MIME-Version: 1.0
1298Content-Transfer-Encoding: 7bit
1299
1300hello world
1301--BOUNDARY--''')
1302
1303
1304 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1305 eq = self.ndiffAssertEqual
1306 outer = MIMEBase('multipart', 'mixed')
1307 outer['Subject'] = 'A subject'
1308 outer['To'] = 'aperson@dom.ain'
1309 outer['From'] = 'bperson@dom.ain'
1310 outer.epilogue = None
1311 msg = MIMEText('hello world')
1312 outer.attach(msg)
1313 outer.set_boundary('BOUNDARY')
1314 eq(outer.as_string(), '''\
1315Content-Type: multipart/mixed; boundary="BOUNDARY"
1316MIME-Version: 1.0
1317Subject: A subject
1318To: aperson@dom.ain
1319From: bperson@dom.ain
1320
1321--BOUNDARY
1322Content-Type: text/plain; charset="us-ascii"
1323MIME-Version: 1.0
1324Content-Transfer-Encoding: 7bit
1325
1326hello world
1327--BOUNDARY--''')
1328
1329
1330 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1331 eq = self.ndiffAssertEqual
1332 outer = MIMEBase('multipart', 'mixed')
1333 outer['Subject'] = 'A subject'
1334 outer['To'] = 'aperson@dom.ain'
1335 outer['From'] = 'bperson@dom.ain'
1336 outer.epilogue = ''
1337 msg = MIMEText('hello world')
1338 outer.attach(msg)
1339 outer.set_boundary('BOUNDARY')
1340 eq(outer.as_string(), '''\
1341Content-Type: multipart/mixed; boundary="BOUNDARY"
1342MIME-Version: 1.0
1343Subject: A subject
1344To: aperson@dom.ain
1345From: bperson@dom.ain
1346
1347--BOUNDARY
1348Content-Type: text/plain; charset="us-ascii"
1349MIME-Version: 1.0
1350Content-Transfer-Encoding: 7bit
1351
1352hello world
1353--BOUNDARY--
1354''')
1355
1356
1357 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1358 eq = self.ndiffAssertEqual
1359 outer = MIMEBase('multipart', 'mixed')
1360 outer['Subject'] = 'A subject'
1361 outer['To'] = 'aperson@dom.ain'
1362 outer['From'] = 'bperson@dom.ain'
1363 outer.epilogue = '\n'
1364 msg = MIMEText('hello world')
1365 outer.attach(msg)
1366 outer.set_boundary('BOUNDARY')
1367 eq(outer.as_string(), '''\
1368Content-Type: multipart/mixed; boundary="BOUNDARY"
1369MIME-Version: 1.0
1370Subject: A subject
1371To: aperson@dom.ain
1372From: bperson@dom.ain
1373
1374--BOUNDARY
1375Content-Type: text/plain; charset="us-ascii"
1376MIME-Version: 1.0
1377Content-Transfer-Encoding: 7bit
1378
1379hello world
1380--BOUNDARY--
1381
1382''')
1383
1384 def test_message_external_body(self):
1385 eq = self.assertEqual
1386 msg = self._msgobj('msg_36.txt')
1387 eq(len(msg.get_payload()), 2)
1388 msg1 = msg.get_payload(1)
1389 eq(msg1.get_content_type(), 'multipart/alternative')
1390 eq(len(msg1.get_payload()), 2)
1391 for subpart in msg1.get_payload():
1392 eq(subpart.get_content_type(), 'message/external-body')
1393 eq(len(subpart.get_payload()), 1)
1394 subsubpart = subpart.get_payload(0)
1395 eq(subsubpart.get_content_type(), 'text/plain')
1396
1397 def test_double_boundary(self):
1398 # msg_37.txt is a multipart that contains two dash-boundary's in a
1399 # row. Our interpretation of RFC 2046 calls for ignoring the second
1400 # and subsequent boundaries.
1401 msg = self._msgobj('msg_37.txt')
1402 self.assertEqual(len(msg.get_payload()), 3)
1403
1404 def test_nested_inner_contains_outer_boundary(self):
1405 eq = self.ndiffAssertEqual
1406 # msg_38.txt has an inner part that contains outer boundaries. My
1407 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1408 # these are illegal and should be interpreted as unterminated inner
1409 # parts.
1410 msg = self._msgobj('msg_38.txt')
1411 sfp = StringIO()
1412 iterators._structure(msg, sfp)
1413 eq(sfp.getvalue(), """\
1414multipart/mixed
1415 multipart/mixed
1416 multipart/alternative
1417 text/plain
1418 text/plain
1419 text/plain
1420 text/plain
1421""")
1422
1423 def test_nested_with_same_boundary(self):
1424 eq = self.ndiffAssertEqual
1425 # msg 39.txt is similarly evil in that it's got inner parts that use
1426 # the same boundary as outer parts. Again, I believe the way this is
1427 # parsed is closest to the spirit of RFC 2046
1428 msg = self._msgobj('msg_39.txt')
1429 sfp = StringIO()
1430 iterators._structure(msg, sfp)
1431 eq(sfp.getvalue(), """\
1432multipart/mixed
1433 multipart/mixed
1434 multipart/alternative
1435 application/octet-stream
1436 application/octet-stream
1437 text/plain
1438""")
1439
1440 def test_boundary_in_non_multipart(self):
1441 msg = self._msgobj('msg_40.txt')
1442 self.assertEqual(msg.as_string(), '''\
1443MIME-Version: 1.0
1444Content-Type: text/html; boundary="--961284236552522269"
1445
1446----961284236552522269
1447Content-Type: text/html;
1448Content-Transfer-Encoding: 7Bit
1449
1450<html></html>
1451
1452----961284236552522269--
1453''')
1454
1455 def test_boundary_with_leading_space(self):
1456 eq = self.assertEqual
1457 msg = email.message_from_string('''\
1458MIME-Version: 1.0
1459Content-Type: multipart/mixed; boundary=" XXXX"
1460
1461-- XXXX
1462Content-Type: text/plain
1463
1464
1465-- XXXX
1466Content-Type: text/plain
1467
1468-- XXXX--
1469''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001470 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001471 eq(msg.get_boundary(), ' XXXX')
1472 eq(len(msg.get_payload()), 2)
1473
1474 def test_boundary_without_trailing_newline(self):
1475 m = Parser().parsestr("""\
1476Content-Type: multipart/mixed; boundary="===============0012394164=="
1477MIME-Version: 1.0
1478
1479--===============0012394164==
1480Content-Type: image/file1.jpg
1481MIME-Version: 1.0
1482Content-Transfer-Encoding: base64
1483
1484YXNkZg==
1485--===============0012394164==--""")
1486 self.assertEquals(m.get_payload(0).get_payload(), 'YXNkZg==')
1487
1488
1489
1490# Test some badly formatted messages
1491class TestNonConformant(TestEmailBase):
1492 def test_parse_missing_minor_type(self):
1493 eq = self.assertEqual
1494 msg = self._msgobj('msg_14.txt')
1495 eq(msg.get_content_type(), 'text/plain')
1496 eq(msg.get_content_maintype(), 'text')
1497 eq(msg.get_content_subtype(), 'plain')
1498
1499 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001500 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001501 msg = self._msgobj('msg_15.txt')
1502 # XXX We can probably eventually do better
1503 inner = msg.get_payload(0)
1504 unless(hasattr(inner, 'defects'))
1505 self.assertEqual(len(inner.defects), 1)
1506 unless(isinstance(inner.defects[0],
1507 errors.StartBoundaryNotFoundDefect))
1508
1509 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001510 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001511 msg = self._msgobj('msg_25.txt')
1512 unless(isinstance(msg.get_payload(), str))
1513 self.assertEqual(len(msg.defects), 2)
1514 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1515 unless(isinstance(msg.defects[1],
1516 errors.MultipartInvariantViolationDefect))
1517
1518 def test_invalid_content_type(self):
1519 eq = self.assertEqual
1520 neq = self.ndiffAssertEqual
1521 msg = Message()
1522 # RFC 2045, $5.2 says invalid yields text/plain
1523 msg['Content-Type'] = 'text'
1524 eq(msg.get_content_maintype(), 'text')
1525 eq(msg.get_content_subtype(), 'plain')
1526 eq(msg.get_content_type(), 'text/plain')
1527 # Clear the old value and try something /really/ invalid
1528 del msg['content-type']
1529 msg['Content-Type'] = 'foo'
1530 eq(msg.get_content_maintype(), 'text')
1531 eq(msg.get_content_subtype(), 'plain')
1532 eq(msg.get_content_type(), 'text/plain')
1533 # Still, make sure that the message is idempotently generated
1534 s = StringIO()
1535 g = Generator(s)
1536 g.flatten(msg)
1537 neq(s.getvalue(), 'Content-Type: foo\n\n')
1538
1539 def test_no_start_boundary(self):
1540 eq = self.ndiffAssertEqual
1541 msg = self._msgobj('msg_31.txt')
1542 eq(msg.get_payload(), """\
1543--BOUNDARY
1544Content-Type: text/plain
1545
1546message 1
1547
1548--BOUNDARY
1549Content-Type: text/plain
1550
1551message 2
1552
1553--BOUNDARY--
1554""")
1555
1556 def test_no_separating_blank_line(self):
1557 eq = self.ndiffAssertEqual
1558 msg = self._msgobj('msg_35.txt')
1559 eq(msg.as_string(), """\
1560From: aperson@dom.ain
1561To: bperson@dom.ain
1562Subject: here's something interesting
1563
1564counter to RFC 2822, there's no separating newline here
1565""")
1566
1567 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001568 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001569 msg = self._msgobj('msg_41.txt')
1570 unless(hasattr(msg, 'defects'))
1571 self.assertEqual(len(msg.defects), 2)
1572 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1573 unless(isinstance(msg.defects[1],
1574 errors.MultipartInvariantViolationDefect))
1575
1576 def test_missing_start_boundary(self):
1577 outer = self._msgobj('msg_42.txt')
1578 # The message structure is:
1579 #
1580 # multipart/mixed
1581 # text/plain
1582 # message/rfc822
1583 # multipart/mixed [*]
1584 #
1585 # [*] This message is missing its start boundary
1586 bad = outer.get_payload(1).get_payload(0)
1587 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001588 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001589 errors.StartBoundaryNotFoundDefect))
1590
1591 def test_first_line_is_continuation_header(self):
1592 eq = self.assertEqual
1593 m = ' Line 1\nLine 2\nLine 3'
1594 msg = email.message_from_string(m)
1595 eq(msg.keys(), [])
1596 eq(msg.get_payload(), 'Line 2\nLine 3')
1597 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001598 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001599 errors.FirstHeaderLineIsContinuationDefect))
1600 eq(msg.defects[0].line, ' Line 1\n')
1601
1602
1603
1604# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001605class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001606 def test_rfc2047_multiline(self):
1607 eq = self.assertEqual
1608 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1609 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1610 dh = decode_header(s)
1611 eq(dh, [
1612 (b'Re:', None),
1613 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1614 (b'baz foo bar', None),
1615 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1616 header = make_header(dh)
1617 eq(str(header),
1618 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001619 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001620Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1621 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001622
1623 def test_whitespace_eater_unicode(self):
1624 eq = self.assertEqual
1625 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1626 dh = decode_header(s)
1627 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1628 (b'Pirard <pirard@dom.ain>', None)])
1629 header = str(make_header(dh))
1630 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1631
1632 def test_whitespace_eater_unicode_2(self):
1633 eq = self.assertEqual
1634 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1635 dh = decode_header(s)
1636 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1637 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1638 hu = str(make_header(dh))
1639 eq(hu, 'The quick brown fox jumped over the lazy dog')
1640
1641 def test_rfc2047_missing_whitespace(self):
1642 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1643 dh = decode_header(s)
1644 self.assertEqual(dh, [(s, None)])
1645
1646 def test_rfc2047_with_whitespace(self):
1647 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1648 dh = decode_header(s)
1649 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1650 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1651 (b'sbord', None)])
1652
R. David Murrayc4e69cc2010-08-03 22:14:10 +00001653 def test_rfc2047_B_bad_padding(self):
1654 s = '=?iso-8859-1?B?%s?='
1655 data = [ # only test complete bytes
1656 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
1657 ('dmk=', b'vi'), ('dmk', b'vi')
1658 ]
1659 for q, a in data:
1660 dh = decode_header(s % q)
1661 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001662
R. David Murray31e984c2010-10-01 15:40:20 +00001663 def test_rfc2047_Q_invalid_digits(self):
1664 # issue 10004.
1665 s = '=?iso-8659-1?Q?andr=e9=zz?='
1666 self.assertEqual(decode_header(s),
1667 [(b'andr\xe9=zz', 'iso-8659-1')])
1668
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001669
1670# Test the MIMEMessage class
1671class TestMIMEMessage(TestEmailBase):
1672 def setUp(self):
1673 with openfile('msg_11.txt') as fp:
1674 self._text = fp.read()
1675
1676 def test_type_error(self):
1677 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1678
1679 def test_valid_argument(self):
1680 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001681 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001682 subject = 'A sub-message'
1683 m = Message()
1684 m['Subject'] = subject
1685 r = MIMEMessage(m)
1686 eq(r.get_content_type(), 'message/rfc822')
1687 payload = r.get_payload()
1688 unless(isinstance(payload, list))
1689 eq(len(payload), 1)
1690 subpart = payload[0]
1691 unless(subpart is m)
1692 eq(subpart['subject'], subject)
1693
1694 def test_bad_multipart(self):
1695 eq = self.assertEqual
1696 msg1 = Message()
1697 msg1['Subject'] = 'subpart 1'
1698 msg2 = Message()
1699 msg2['Subject'] = 'subpart 2'
1700 r = MIMEMessage(msg1)
1701 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
1702
1703 def test_generate(self):
1704 # First craft the message to be encapsulated
1705 m = Message()
1706 m['Subject'] = 'An enclosed message'
1707 m.set_payload('Here is the body of the message.\n')
1708 r = MIMEMessage(m)
1709 r['Subject'] = 'The enclosing message'
1710 s = StringIO()
1711 g = Generator(s)
1712 g.flatten(r)
1713 self.assertEqual(s.getvalue(), """\
1714Content-Type: message/rfc822
1715MIME-Version: 1.0
1716Subject: The enclosing message
1717
1718Subject: An enclosed message
1719
1720Here is the body of the message.
1721""")
1722
1723 def test_parse_message_rfc822(self):
1724 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001725 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001726 msg = self._msgobj('msg_11.txt')
1727 eq(msg.get_content_type(), 'message/rfc822')
1728 payload = msg.get_payload()
1729 unless(isinstance(payload, list))
1730 eq(len(payload), 1)
1731 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001732 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001733 eq(submsg['subject'], 'An enclosed message')
1734 eq(submsg.get_payload(), 'Here is the body of the message.\n')
1735
1736 def test_dsn(self):
1737 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001738 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001739 # msg 16 is a Delivery Status Notification, see RFC 1894
1740 msg = self._msgobj('msg_16.txt')
1741 eq(msg.get_content_type(), 'multipart/report')
1742 unless(msg.is_multipart())
1743 eq(len(msg.get_payload()), 3)
1744 # Subpart 1 is a text/plain, human readable section
1745 subpart = msg.get_payload(0)
1746 eq(subpart.get_content_type(), 'text/plain')
1747 eq(subpart.get_payload(), """\
1748This report relates to a message you sent with the following header fields:
1749
1750 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
1751 Date: Sun, 23 Sep 2001 20:10:55 -0700
1752 From: "Ian T. Henry" <henryi@oxy.edu>
1753 To: SoCal Raves <scr@socal-raves.org>
1754 Subject: [scr] yeah for Ians!!
1755
1756Your message cannot be delivered to the following recipients:
1757
1758 Recipient address: jangel1@cougar.noc.ucla.edu
1759 Reason: recipient reached disk quota
1760
1761""")
1762 # Subpart 2 contains the machine parsable DSN information. It
1763 # consists of two blocks of headers, represented by two nested Message
1764 # objects.
1765 subpart = msg.get_payload(1)
1766 eq(subpart.get_content_type(), 'message/delivery-status')
1767 eq(len(subpart.get_payload()), 2)
1768 # message/delivery-status should treat each block as a bunch of
1769 # headers, i.e. a bunch of Message objects.
1770 dsn1 = subpart.get_payload(0)
1771 unless(isinstance(dsn1, Message))
1772 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
1773 eq(dsn1.get_param('dns', header='reporting-mta'), '')
1774 # Try a missing one <wink>
1775 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
1776 dsn2 = subpart.get_payload(1)
1777 unless(isinstance(dsn2, Message))
1778 eq(dsn2['action'], 'failed')
1779 eq(dsn2.get_params(header='original-recipient'),
1780 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
1781 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
1782 # Subpart 3 is the original message
1783 subpart = msg.get_payload(2)
1784 eq(subpart.get_content_type(), 'message/rfc822')
1785 payload = subpart.get_payload()
1786 unless(isinstance(payload, list))
1787 eq(len(payload), 1)
1788 subsubpart = payload[0]
1789 unless(isinstance(subsubpart, Message))
1790 eq(subsubpart.get_content_type(), 'text/plain')
1791 eq(subsubpart['message-id'],
1792 '<002001c144a6$8752e060$56104586@oxy.edu>')
1793
1794 def test_epilogue(self):
1795 eq = self.ndiffAssertEqual
1796 with openfile('msg_21.txt') as fp:
1797 text = fp.read()
1798 msg = Message()
1799 msg['From'] = 'aperson@dom.ain'
1800 msg['To'] = 'bperson@dom.ain'
1801 msg['Subject'] = 'Test'
1802 msg.preamble = 'MIME message'
1803 msg.epilogue = 'End of MIME message\n'
1804 msg1 = MIMEText('One')
1805 msg2 = MIMEText('Two')
1806 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1807 msg.attach(msg1)
1808 msg.attach(msg2)
1809 sfp = StringIO()
1810 g = Generator(sfp)
1811 g.flatten(msg)
1812 eq(sfp.getvalue(), text)
1813
1814 def test_no_nl_preamble(self):
1815 eq = self.ndiffAssertEqual
1816 msg = Message()
1817 msg['From'] = 'aperson@dom.ain'
1818 msg['To'] = 'bperson@dom.ain'
1819 msg['Subject'] = 'Test'
1820 msg.preamble = 'MIME message'
1821 msg.epilogue = ''
1822 msg1 = MIMEText('One')
1823 msg2 = MIMEText('Two')
1824 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1825 msg.attach(msg1)
1826 msg.attach(msg2)
1827 eq(msg.as_string(), """\
1828From: aperson@dom.ain
1829To: bperson@dom.ain
1830Subject: Test
1831Content-Type: multipart/mixed; boundary="BOUNDARY"
1832
1833MIME message
1834--BOUNDARY
1835Content-Type: text/plain; charset="us-ascii"
1836MIME-Version: 1.0
1837Content-Transfer-Encoding: 7bit
1838
1839One
1840--BOUNDARY
1841Content-Type: text/plain; charset="us-ascii"
1842MIME-Version: 1.0
1843Content-Transfer-Encoding: 7bit
1844
1845Two
1846--BOUNDARY--
1847""")
1848
1849 def test_default_type(self):
1850 eq = self.assertEqual
1851 with openfile('msg_30.txt') as fp:
1852 msg = email.message_from_file(fp)
1853 container1 = msg.get_payload(0)
1854 eq(container1.get_default_type(), 'message/rfc822')
1855 eq(container1.get_content_type(), 'message/rfc822')
1856 container2 = msg.get_payload(1)
1857 eq(container2.get_default_type(), 'message/rfc822')
1858 eq(container2.get_content_type(), 'message/rfc822')
1859 container1a = container1.get_payload(0)
1860 eq(container1a.get_default_type(), 'text/plain')
1861 eq(container1a.get_content_type(), 'text/plain')
1862 container2a = container2.get_payload(0)
1863 eq(container2a.get_default_type(), 'text/plain')
1864 eq(container2a.get_content_type(), 'text/plain')
1865
1866 def test_default_type_with_explicit_container_type(self):
1867 eq = self.assertEqual
1868 with openfile('msg_28.txt') as fp:
1869 msg = email.message_from_file(fp)
1870 container1 = msg.get_payload(0)
1871 eq(container1.get_default_type(), 'message/rfc822')
1872 eq(container1.get_content_type(), 'message/rfc822')
1873 container2 = msg.get_payload(1)
1874 eq(container2.get_default_type(), 'message/rfc822')
1875 eq(container2.get_content_type(), 'message/rfc822')
1876 container1a = container1.get_payload(0)
1877 eq(container1a.get_default_type(), 'text/plain')
1878 eq(container1a.get_content_type(), 'text/plain')
1879 container2a = container2.get_payload(0)
1880 eq(container2a.get_default_type(), 'text/plain')
1881 eq(container2a.get_content_type(), 'text/plain')
1882
1883 def test_default_type_non_parsed(self):
1884 eq = self.assertEqual
1885 neq = self.ndiffAssertEqual
1886 # Set up container
1887 container = MIMEMultipart('digest', 'BOUNDARY')
1888 container.epilogue = ''
1889 # Set up subparts
1890 subpart1a = MIMEText('message 1\n')
1891 subpart2a = MIMEText('message 2\n')
1892 subpart1 = MIMEMessage(subpart1a)
1893 subpart2 = MIMEMessage(subpart2a)
1894 container.attach(subpart1)
1895 container.attach(subpart2)
1896 eq(subpart1.get_content_type(), 'message/rfc822')
1897 eq(subpart1.get_default_type(), 'message/rfc822')
1898 eq(subpart2.get_content_type(), 'message/rfc822')
1899 eq(subpart2.get_default_type(), 'message/rfc822')
1900 neq(container.as_string(0), '''\
1901Content-Type: multipart/digest; boundary="BOUNDARY"
1902MIME-Version: 1.0
1903
1904--BOUNDARY
1905Content-Type: message/rfc822
1906MIME-Version: 1.0
1907
1908Content-Type: text/plain; charset="us-ascii"
1909MIME-Version: 1.0
1910Content-Transfer-Encoding: 7bit
1911
1912message 1
1913
1914--BOUNDARY
1915Content-Type: message/rfc822
1916MIME-Version: 1.0
1917
1918Content-Type: text/plain; charset="us-ascii"
1919MIME-Version: 1.0
1920Content-Transfer-Encoding: 7bit
1921
1922message 2
1923
1924--BOUNDARY--
1925''')
1926 del subpart1['content-type']
1927 del subpart1['mime-version']
1928 del subpart2['content-type']
1929 del subpart2['mime-version']
1930 eq(subpart1.get_content_type(), 'message/rfc822')
1931 eq(subpart1.get_default_type(), 'message/rfc822')
1932 eq(subpart2.get_content_type(), 'message/rfc822')
1933 eq(subpart2.get_default_type(), 'message/rfc822')
1934 neq(container.as_string(0), '''\
1935Content-Type: multipart/digest; boundary="BOUNDARY"
1936MIME-Version: 1.0
1937
1938--BOUNDARY
1939
1940Content-Type: text/plain; charset="us-ascii"
1941MIME-Version: 1.0
1942Content-Transfer-Encoding: 7bit
1943
1944message 1
1945
1946--BOUNDARY
1947
1948Content-Type: text/plain; charset="us-ascii"
1949MIME-Version: 1.0
1950Content-Transfer-Encoding: 7bit
1951
1952message 2
1953
1954--BOUNDARY--
1955''')
1956
1957 def test_mime_attachments_in_constructor(self):
1958 eq = self.assertEqual
1959 text1 = MIMEText('')
1960 text2 = MIMEText('')
1961 msg = MIMEMultipart(_subparts=(text1, text2))
1962 eq(len(msg.get_payload()), 2)
1963 eq(msg.get_payload(0), text1)
1964 eq(msg.get_payload(1), text2)
1965
Christian Heimes587c2bf2008-01-19 16:21:02 +00001966 def test_default_multipart_constructor(self):
1967 msg = MIMEMultipart()
1968 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001969
1970
1971# A general test of parser->model->generator idempotency. IOW, read a message
1972# in, parse it into a message object tree, then without touching the tree,
1973# regenerate the plain text. The original text and the transformed text
1974# should be identical. Note: that we ignore the Unix-From since that may
1975# contain a changed date.
1976class TestIdempotent(TestEmailBase):
1977 def _msgobj(self, filename):
1978 with openfile(filename) as fp:
1979 data = fp.read()
1980 msg = email.message_from_string(data)
1981 return msg, data
1982
1983 def _idempotent(self, msg, text):
1984 eq = self.ndiffAssertEqual
1985 s = StringIO()
1986 g = Generator(s, maxheaderlen=0)
1987 g.flatten(msg)
1988 eq(text, s.getvalue())
1989
1990 def test_parse_text_message(self):
1991 eq = self.assertEquals
1992 msg, text = self._msgobj('msg_01.txt')
1993 eq(msg.get_content_type(), 'text/plain')
1994 eq(msg.get_content_maintype(), 'text')
1995 eq(msg.get_content_subtype(), 'plain')
1996 eq(msg.get_params()[1], ('charset', 'us-ascii'))
1997 eq(msg.get_param('charset'), 'us-ascii')
1998 eq(msg.preamble, None)
1999 eq(msg.epilogue, None)
2000 self._idempotent(msg, text)
2001
2002 def test_parse_untyped_message(self):
2003 eq = self.assertEquals
2004 msg, text = self._msgobj('msg_03.txt')
2005 eq(msg.get_content_type(), 'text/plain')
2006 eq(msg.get_params(), None)
2007 eq(msg.get_param('charset'), None)
2008 self._idempotent(msg, text)
2009
2010 def test_simple_multipart(self):
2011 msg, text = self._msgobj('msg_04.txt')
2012 self._idempotent(msg, text)
2013
2014 def test_MIME_digest(self):
2015 msg, text = self._msgobj('msg_02.txt')
2016 self._idempotent(msg, text)
2017
2018 def test_long_header(self):
2019 msg, text = self._msgobj('msg_27.txt')
2020 self._idempotent(msg, text)
2021
2022 def test_MIME_digest_with_part_headers(self):
2023 msg, text = self._msgobj('msg_28.txt')
2024 self._idempotent(msg, text)
2025
2026 def test_mixed_with_image(self):
2027 msg, text = self._msgobj('msg_06.txt')
2028 self._idempotent(msg, text)
2029
2030 def test_multipart_report(self):
2031 msg, text = self._msgobj('msg_05.txt')
2032 self._idempotent(msg, text)
2033
2034 def test_dsn(self):
2035 msg, text = self._msgobj('msg_16.txt')
2036 self._idempotent(msg, text)
2037
2038 def test_preamble_epilogue(self):
2039 msg, text = self._msgobj('msg_21.txt')
2040 self._idempotent(msg, text)
2041
2042 def test_multipart_one_part(self):
2043 msg, text = self._msgobj('msg_23.txt')
2044 self._idempotent(msg, text)
2045
2046 def test_multipart_no_parts(self):
2047 msg, text = self._msgobj('msg_24.txt')
2048 self._idempotent(msg, text)
2049
2050 def test_no_start_boundary(self):
2051 msg, text = self._msgobj('msg_31.txt')
2052 self._idempotent(msg, text)
2053
2054 def test_rfc2231_charset(self):
2055 msg, text = self._msgobj('msg_32.txt')
2056 self._idempotent(msg, text)
2057
2058 def test_more_rfc2231_parameters(self):
2059 msg, text = self._msgobj('msg_33.txt')
2060 self._idempotent(msg, text)
2061
2062 def test_text_plain_in_a_multipart_digest(self):
2063 msg, text = self._msgobj('msg_34.txt')
2064 self._idempotent(msg, text)
2065
2066 def test_nested_multipart_mixeds(self):
2067 msg, text = self._msgobj('msg_12a.txt')
2068 self._idempotent(msg, text)
2069
2070 def test_message_external_body_idempotent(self):
2071 msg, text = self._msgobj('msg_36.txt')
2072 self._idempotent(msg, text)
2073
R. David Murray96fd54e2010-10-08 15:55:28 +00002074 def test_message_signed_idempotent(self):
2075 msg, text = self._msgobj('msg_45.txt')
2076 self._idempotent(msg, text)
2077
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002078 def test_content_type(self):
2079 eq = self.assertEquals
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002080 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002081 # Get a message object and reset the seek pointer for other tests
2082 msg, text = self._msgobj('msg_05.txt')
2083 eq(msg.get_content_type(), 'multipart/report')
2084 # Test the Content-Type: parameters
2085 params = {}
2086 for pk, pv in msg.get_params():
2087 params[pk] = pv
2088 eq(params['report-type'], 'delivery-status')
2089 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
2090 eq(msg.preamble, 'This is a MIME-encapsulated message.\n')
2091 eq(msg.epilogue, '\n')
2092 eq(len(msg.get_payload()), 3)
2093 # Make sure the subparts are what we expect
2094 msg1 = msg.get_payload(0)
2095 eq(msg1.get_content_type(), 'text/plain')
2096 eq(msg1.get_payload(), 'Yadda yadda yadda\n')
2097 msg2 = msg.get_payload(1)
2098 eq(msg2.get_content_type(), 'text/plain')
2099 eq(msg2.get_payload(), 'Yadda yadda yadda\n')
2100 msg3 = msg.get_payload(2)
2101 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002102 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002103 payload = msg3.get_payload()
2104 unless(isinstance(payload, list))
2105 eq(len(payload), 1)
2106 msg4 = payload[0]
2107 unless(isinstance(msg4, Message))
2108 eq(msg4.get_payload(), 'Yadda yadda yadda\n')
2109
2110 def test_parser(self):
2111 eq = self.assertEquals
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002112 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002113 msg, text = self._msgobj('msg_06.txt')
2114 # Check some of the outer headers
2115 eq(msg.get_content_type(), 'message/rfc822')
2116 # Make sure the payload is a list of exactly one sub-Message, and that
2117 # that submessage has a type of text/plain
2118 payload = msg.get_payload()
2119 unless(isinstance(payload, list))
2120 eq(len(payload), 1)
2121 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002122 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002123 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002124 self.assertTrue(isinstance(msg1.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002125 eq(msg1.get_payload(), '\n')
2126
2127
2128
2129# Test various other bits of the package's functionality
2130class TestMiscellaneous(TestEmailBase):
2131 def test_message_from_string(self):
2132 with openfile('msg_01.txt') as fp:
2133 text = fp.read()
2134 msg = email.message_from_string(text)
2135 s = StringIO()
2136 # Don't wrap/continue long headers since we're trying to test
2137 # idempotency.
2138 g = Generator(s, maxheaderlen=0)
2139 g.flatten(msg)
2140 self.assertEqual(text, s.getvalue())
2141
2142 def test_message_from_file(self):
2143 with openfile('msg_01.txt') as fp:
2144 text = fp.read()
2145 fp.seek(0)
2146 msg = email.message_from_file(fp)
2147 s = StringIO()
2148 # Don't wrap/continue long headers since we're trying to test
2149 # idempotency.
2150 g = Generator(s, maxheaderlen=0)
2151 g.flatten(msg)
2152 self.assertEqual(text, s.getvalue())
2153
2154 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002155 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002156 with openfile('msg_01.txt') as fp:
2157 text = fp.read()
2158
2159 # Create a subclass
2160 class MyMessage(Message):
2161 pass
2162
2163 msg = email.message_from_string(text, MyMessage)
2164 unless(isinstance(msg, MyMessage))
2165 # Try something more complicated
2166 with openfile('msg_02.txt') as fp:
2167 text = fp.read()
2168 msg = email.message_from_string(text, MyMessage)
2169 for subpart in msg.walk():
2170 unless(isinstance(subpart, MyMessage))
2171
2172 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002173 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002174 # Create a subclass
2175 class MyMessage(Message):
2176 pass
2177
2178 with openfile('msg_01.txt') as fp:
2179 msg = email.message_from_file(fp, MyMessage)
2180 unless(isinstance(msg, MyMessage))
2181 # Try something more complicated
2182 with openfile('msg_02.txt') as fp:
2183 msg = email.message_from_file(fp, MyMessage)
2184 for subpart in msg.walk():
2185 unless(isinstance(subpart, MyMessage))
2186
2187 def test__all__(self):
2188 module = __import__('email')
2189 # Can't use sorted() here due to Python 2.3 compatibility
2190 all = module.__all__[:]
2191 all.sort()
2192 self.assertEqual(all, [
2193 'base64mime', 'charset', 'encoders', 'errors', 'generator',
R. David Murray96fd54e2010-10-08 15:55:28 +00002194 'header', 'iterators', 'message', 'message_from_binary_file',
2195 'message_from_bytes', 'message_from_file',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002196 'message_from_string', 'mime', 'parser',
2197 'quoprimime', 'utils',
2198 ])
2199
2200 def test_formatdate(self):
2201 now = time.time()
2202 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2203 time.gmtime(now)[:6])
2204
2205 def test_formatdate_localtime(self):
2206 now = time.time()
2207 self.assertEqual(
2208 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2209 time.localtime(now)[:6])
2210
2211 def test_formatdate_usegmt(self):
2212 now = time.time()
2213 self.assertEqual(
2214 utils.formatdate(now, localtime=False),
2215 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2216 self.assertEqual(
2217 utils.formatdate(now, localtime=False, usegmt=True),
2218 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2219
2220 def test_parsedate_none(self):
2221 self.assertEqual(utils.parsedate(''), None)
2222
2223 def test_parsedate_compact(self):
2224 # The FWS after the comma is optional
2225 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2226 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2227
2228 def test_parsedate_no_dayofweek(self):
2229 eq = self.assertEqual
2230 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2231 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2232
2233 def test_parsedate_compact_no_dayofweek(self):
2234 eq = self.assertEqual
2235 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2236 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2237
2238 def test_parsedate_acceptable_to_time_functions(self):
2239 eq = self.assertEqual
2240 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2241 t = int(time.mktime(timetup))
2242 eq(time.localtime(t)[:6], timetup[:6])
2243 eq(int(time.strftime('%Y', timetup)), 2003)
2244 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2245 t = int(time.mktime(timetup[:9]))
2246 eq(time.localtime(t)[:6], timetup[:6])
2247 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2248
R. David Murray219d1c82010-08-25 00:45:55 +00002249 def test_parsedate_y2k(self):
2250 """Test for parsing a date with a two-digit year.
2251
2252 Parsing a date with a two-digit year should return the correct
2253 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2254 obsoletes RFC822) requires four-digit years.
2255
2256 """
2257 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2258 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2259 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2260 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2261
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002262 def test_parseaddr_empty(self):
2263 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2264 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2265
2266 def test_noquote_dump(self):
2267 self.assertEqual(
2268 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2269 'A Silly Person <person@dom.ain>')
2270
2271 def test_escape_dump(self):
2272 self.assertEqual(
2273 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2274 r'"A \(Very\) Silly Person" <person@dom.ain>')
2275 a = r'A \(Special\) Person'
2276 b = 'person@dom.ain'
2277 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2278
2279 def test_escape_backslashes(self):
2280 self.assertEqual(
2281 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2282 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2283 a = r'Arthur \Backslash\ Foobar'
2284 b = 'person@dom.ain'
2285 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2286
2287 def test_name_with_dot(self):
2288 x = 'John X. Doe <jxd@example.com>'
2289 y = '"John X. Doe" <jxd@example.com>'
2290 a, b = ('John X. Doe', 'jxd@example.com')
2291 self.assertEqual(utils.parseaddr(x), (a, b))
2292 self.assertEqual(utils.parseaddr(y), (a, b))
2293 # formataddr() quotes the name if there's a dot in it
2294 self.assertEqual(utils.formataddr((a, b)), y)
2295
R. David Murray5397e862010-10-02 15:58:26 +00002296 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2297 # issue 10005. Note that in the third test the second pair of
2298 # backslashes is not actually a quoted pair because it is not inside a
2299 # comment or quoted string: the address being parsed has a quoted
2300 # string containing a quoted backslash, followed by 'example' and two
2301 # backslashes, followed by another quoted string containing a space and
2302 # the word 'example'. parseaddr copies those two backslashes
2303 # literally. Per rfc5322 this is not technically correct since a \ may
2304 # not appear in an address outside of a quoted string. It is probably
2305 # a sensible Postel interpretation, though.
2306 eq = self.assertEqual
2307 eq(utils.parseaddr('""example" example"@example.com'),
2308 ('', '""example" example"@example.com'))
2309 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2310 ('', '"\\"example\\" example"@example.com'))
2311 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2312 ('', '"\\\\"example\\\\" example"@example.com'))
2313
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002314 def test_multiline_from_comment(self):
2315 x = """\
2316Foo
2317\tBar <foo@example.com>"""
2318 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2319
2320 def test_quote_dump(self):
2321 self.assertEqual(
2322 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2323 r'"A Silly; Person" <person@dom.ain>')
2324
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002325 def test_charset_richcomparisons(self):
2326 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002327 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002328 cset1 = Charset()
2329 cset2 = Charset()
2330 eq(cset1, 'us-ascii')
2331 eq(cset1, 'US-ASCII')
2332 eq(cset1, 'Us-AsCiI')
2333 eq('us-ascii', cset1)
2334 eq('US-ASCII', cset1)
2335 eq('Us-AsCiI', cset1)
2336 ne(cset1, 'usascii')
2337 ne(cset1, 'USASCII')
2338 ne(cset1, 'UsAsCiI')
2339 ne('usascii', cset1)
2340 ne('USASCII', cset1)
2341 ne('UsAsCiI', cset1)
2342 eq(cset1, cset2)
2343 eq(cset2, cset1)
2344
2345 def test_getaddresses(self):
2346 eq = self.assertEqual
2347 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2348 'Bud Person <bperson@dom.ain>']),
2349 [('Al Person', 'aperson@dom.ain'),
2350 ('Bud Person', 'bperson@dom.ain')])
2351
2352 def test_getaddresses_nasty(self):
2353 eq = self.assertEqual
2354 eq(utils.getaddresses(['foo: ;']), [('', '')])
2355 eq(utils.getaddresses(
2356 ['[]*-- =~$']),
2357 [('', ''), ('', ''), ('', '*--')])
2358 eq(utils.getaddresses(
2359 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2360 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2361
2362 def test_getaddresses_embedded_comment(self):
2363 """Test proper handling of a nested comment"""
2364 eq = self.assertEqual
2365 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2366 eq(addrs[0][1], 'foo@bar.com')
2367
2368 def test_utils_quote_unquote(self):
2369 eq = self.assertEqual
2370 msg = Message()
2371 msg.add_header('content-disposition', 'attachment',
2372 filename='foo\\wacky"name')
2373 eq(msg.get_filename(), 'foo\\wacky"name')
2374
2375 def test_get_body_encoding_with_bogus_charset(self):
2376 charset = Charset('not a charset')
2377 self.assertEqual(charset.get_body_encoding(), 'base64')
2378
2379 def test_get_body_encoding_with_uppercase_charset(self):
2380 eq = self.assertEqual
2381 msg = Message()
2382 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2383 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2384 charsets = msg.get_charsets()
2385 eq(len(charsets), 1)
2386 eq(charsets[0], 'utf-8')
2387 charset = Charset(charsets[0])
2388 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002389 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002390 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2391 eq(msg.get_payload(decode=True), b'hello world')
2392 eq(msg['content-transfer-encoding'], 'base64')
2393 # Try another one
2394 msg = Message()
2395 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2396 charsets = msg.get_charsets()
2397 eq(len(charsets), 1)
2398 eq(charsets[0], 'us-ascii')
2399 charset = Charset(charsets[0])
2400 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2401 msg.set_payload('hello world', charset=charset)
2402 eq(msg.get_payload(), 'hello world')
2403 eq(msg['content-transfer-encoding'], '7bit')
2404
2405 def test_charsets_case_insensitive(self):
2406 lc = Charset('us-ascii')
2407 uc = Charset('US-ASCII')
2408 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2409
2410 def test_partial_falls_inside_message_delivery_status(self):
2411 eq = self.ndiffAssertEqual
2412 # The Parser interface provides chunks of data to FeedParser in 8192
2413 # byte gulps. SF bug #1076485 found one of those chunks inside
2414 # message/delivery-status header block, which triggered an
2415 # unreadline() of NeedMoreData.
2416 msg = self._msgobj('msg_43.txt')
2417 sfp = StringIO()
2418 iterators._structure(msg, sfp)
2419 eq(sfp.getvalue(), """\
2420multipart/report
2421 text/plain
2422 message/delivery-status
2423 text/plain
2424 text/plain
2425 text/plain
2426 text/plain
2427 text/plain
2428 text/plain
2429 text/plain
2430 text/plain
2431 text/plain
2432 text/plain
2433 text/plain
2434 text/plain
2435 text/plain
2436 text/plain
2437 text/plain
2438 text/plain
2439 text/plain
2440 text/plain
2441 text/plain
2442 text/plain
2443 text/plain
2444 text/plain
2445 text/plain
2446 text/plain
2447 text/plain
2448 text/plain
2449 text/rfc822-headers
2450""")
2451
2452
2453
2454# Test the iterator/generators
2455class TestIterators(TestEmailBase):
2456 def test_body_line_iterator(self):
2457 eq = self.assertEqual
2458 neq = self.ndiffAssertEqual
2459 # First a simple non-multipart message
2460 msg = self._msgobj('msg_01.txt')
2461 it = iterators.body_line_iterator(msg)
2462 lines = list(it)
2463 eq(len(lines), 6)
2464 neq(EMPTYSTRING.join(lines), msg.get_payload())
2465 # Now a more complicated multipart
2466 msg = self._msgobj('msg_02.txt')
2467 it = iterators.body_line_iterator(msg)
2468 lines = list(it)
2469 eq(len(lines), 43)
2470 with openfile('msg_19.txt') as fp:
2471 neq(EMPTYSTRING.join(lines), fp.read())
2472
2473 def test_typed_subpart_iterator(self):
2474 eq = self.assertEqual
2475 msg = self._msgobj('msg_04.txt')
2476 it = iterators.typed_subpart_iterator(msg, 'text')
2477 lines = []
2478 subparts = 0
2479 for subpart in it:
2480 subparts += 1
2481 lines.append(subpart.get_payload())
2482 eq(subparts, 2)
2483 eq(EMPTYSTRING.join(lines), """\
2484a simple kind of mirror
2485to reflect upon our own
2486a simple kind of mirror
2487to reflect upon our own
2488""")
2489
2490 def test_typed_subpart_iterator_default_type(self):
2491 eq = self.assertEqual
2492 msg = self._msgobj('msg_03.txt')
2493 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2494 lines = []
2495 subparts = 0
2496 for subpart in it:
2497 subparts += 1
2498 lines.append(subpart.get_payload())
2499 eq(subparts, 1)
2500 eq(EMPTYSTRING.join(lines), """\
2501
2502Hi,
2503
2504Do you like this message?
2505
2506-Me
2507""")
2508
R. David Murray45bf773f2010-07-17 01:19:57 +00002509 def test_pushCR_LF(self):
2510 '''FeedParser BufferedSubFile.push() assumed it received complete
2511 line endings. A CR ending one push() followed by a LF starting
2512 the next push() added an empty line.
2513 '''
2514 imt = [
2515 ("a\r \n", 2),
2516 ("b", 0),
2517 ("c\n", 1),
2518 ("", 0),
2519 ("d\r\n", 1),
2520 ("e\r", 0),
2521 ("\nf", 1),
2522 ("\r\n", 1),
2523 ]
2524 from email.feedparser import BufferedSubFile, NeedMoreData
2525 bsf = BufferedSubFile()
2526 om = []
2527 nt = 0
2528 for il, n in imt:
2529 bsf.push(il)
2530 nt += n
2531 n1 = 0
2532 while True:
2533 ol = bsf.readline()
2534 if ol == NeedMoreData:
2535 break
2536 om.append(ol)
2537 n1 += 1
2538 self.assertTrue(n == n1)
2539 self.assertTrue(len(om) == nt)
2540 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2541
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002542
2543
2544class TestParsers(TestEmailBase):
2545 def test_header_parser(self):
2546 eq = self.assertEqual
2547 # Parse only the headers of a complex multipart MIME document
2548 with openfile('msg_02.txt') as fp:
2549 msg = HeaderParser().parse(fp)
2550 eq(msg['from'], 'ppp-request@zzz.org')
2551 eq(msg['to'], 'ppp@zzz.org')
2552 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002553 self.assertFalse(msg.is_multipart())
2554 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002555
2556 def test_whitespace_continuation(self):
2557 eq = self.assertEqual
2558 # This message contains a line after the Subject: header that has only
2559 # whitespace, but it is not empty!
2560 msg = email.message_from_string("""\
2561From: aperson@dom.ain
2562To: bperson@dom.ain
2563Subject: the next line has a space on it
2564\x20
2565Date: Mon, 8 Apr 2002 15:09:19 -0400
2566Message-ID: spam
2567
2568Here's the message body
2569""")
2570 eq(msg['subject'], 'the next line has a space on it\n ')
2571 eq(msg['message-id'], 'spam')
2572 eq(msg.get_payload(), "Here's the message body\n")
2573
2574 def test_whitespace_continuation_last_header(self):
2575 eq = self.assertEqual
2576 # Like the previous test, but the subject line is the last
2577 # header.
2578 msg = email.message_from_string("""\
2579From: aperson@dom.ain
2580To: bperson@dom.ain
2581Date: Mon, 8 Apr 2002 15:09:19 -0400
2582Message-ID: spam
2583Subject: the next line has a space on it
2584\x20
2585
2586Here's the message body
2587""")
2588 eq(msg['subject'], 'the next line has a space on it\n ')
2589 eq(msg['message-id'], 'spam')
2590 eq(msg.get_payload(), "Here's the message body\n")
2591
2592 def test_crlf_separation(self):
2593 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002594 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002595 msg = Parser().parse(fp)
2596 eq(len(msg.get_payload()), 2)
2597 part1 = msg.get_payload(0)
2598 eq(part1.get_content_type(), 'text/plain')
2599 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2600 part2 = msg.get_payload(1)
2601 eq(part2.get_content_type(), 'application/riscos')
2602
2603 def test_multipart_digest_with_extra_mime_headers(self):
2604 eq = self.assertEqual
2605 neq = self.ndiffAssertEqual
2606 with openfile('msg_28.txt') as fp:
2607 msg = email.message_from_file(fp)
2608 # Structure is:
2609 # multipart/digest
2610 # message/rfc822
2611 # text/plain
2612 # message/rfc822
2613 # text/plain
2614 eq(msg.is_multipart(), 1)
2615 eq(len(msg.get_payload()), 2)
2616 part1 = msg.get_payload(0)
2617 eq(part1.get_content_type(), 'message/rfc822')
2618 eq(part1.is_multipart(), 1)
2619 eq(len(part1.get_payload()), 1)
2620 part1a = part1.get_payload(0)
2621 eq(part1a.is_multipart(), 0)
2622 eq(part1a.get_content_type(), 'text/plain')
2623 neq(part1a.get_payload(), 'message 1\n')
2624 # next message/rfc822
2625 part2 = msg.get_payload(1)
2626 eq(part2.get_content_type(), 'message/rfc822')
2627 eq(part2.is_multipart(), 1)
2628 eq(len(part2.get_payload()), 1)
2629 part2a = part2.get_payload(0)
2630 eq(part2a.is_multipart(), 0)
2631 eq(part2a.get_content_type(), 'text/plain')
2632 neq(part2a.get_payload(), 'message 2\n')
2633
2634 def test_three_lines(self):
2635 # A bug report by Andrew McNamara
2636 lines = ['From: Andrew Person <aperson@dom.ain',
2637 'Subject: Test',
2638 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2639 msg = email.message_from_string(NL.join(lines))
2640 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
2641
2642 def test_strip_line_feed_and_carriage_return_in_headers(self):
2643 eq = self.assertEqual
2644 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
2645 value1 = 'text'
2646 value2 = 'more text'
2647 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
2648 value1, value2)
2649 msg = email.message_from_string(m)
2650 eq(msg.get('Header'), value1)
2651 eq(msg.get('Next-Header'), value2)
2652
2653 def test_rfc2822_header_syntax(self):
2654 eq = self.assertEqual
2655 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2656 msg = email.message_from_string(m)
2657 eq(len(msg), 3)
2658 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
2659 eq(msg.get_payload(), 'body')
2660
2661 def test_rfc2822_space_not_allowed_in_header(self):
2662 eq = self.assertEqual
2663 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2664 msg = email.message_from_string(m)
2665 eq(len(msg.keys()), 0)
2666
2667 def test_rfc2822_one_character_header(self):
2668 eq = self.assertEqual
2669 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
2670 msg = email.message_from_string(m)
2671 headers = msg.keys()
2672 headers.sort()
2673 eq(headers, ['A', 'B', 'CC'])
2674 eq(msg.get_payload(), 'body')
2675
R. David Murray45e0e142010-06-16 02:19:40 +00002676 def test_CRLFLF_at_end_of_part(self):
2677 # issue 5610: feedparser should not eat two chars from body part ending
2678 # with "\r\n\n".
2679 m = (
2680 "From: foo@bar.com\n"
2681 "To: baz\n"
2682 "Mime-Version: 1.0\n"
2683 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
2684 "\n"
2685 "--BOUNDARY\n"
2686 "Content-Type: text/plain\n"
2687 "\n"
2688 "body ending with CRLF newline\r\n"
2689 "\n"
2690 "--BOUNDARY--\n"
2691 )
2692 msg = email.message_from_string(m)
2693 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002694
2695
R. David Murray96fd54e2010-10-08 15:55:28 +00002696class Test8BitBytesHandling(unittest.TestCase):
2697 # In Python3 all input is string, but that doesn't work if the actual input
2698 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
2699 # decode byte streams using the surrogateescape error handler, and
2700 # reconvert to binary at appropriate places if we detect surrogates. This
2701 # doesn't allow us to transform headers with 8bit bytes (they get munged),
2702 # but it does allow us to parse and preserve them, and to decode body
2703 # parts that use an 8bit CTE.
2704
2705 bodytest_msg = textwrap.dedent("""\
2706 From: foo@bar.com
2707 To: baz
2708 Mime-Version: 1.0
2709 Content-Type: text/plain; charset={charset}
2710 Content-Transfer-Encoding: {cte}
2711
2712 {bodyline}
2713 """)
2714
2715 def test_known_8bit_CTE(self):
2716 m = self.bodytest_msg.format(charset='utf-8',
2717 cte='8bit',
2718 bodyline='pöstal').encode('utf-8')
2719 msg = email.message_from_bytes(m)
2720 self.assertEqual(msg.get_payload(), "pöstal\n")
2721 self.assertEqual(msg.get_payload(decode=True),
2722 "pöstal\n".encode('utf-8'))
2723
2724 def test_unknown_8bit_CTE(self):
2725 m = self.bodytest_msg.format(charset='notavalidcharset',
2726 cte='8bit',
2727 bodyline='pöstal').encode('utf-8')
2728 msg = email.message_from_bytes(m)
2729 self.assertEqual(msg.get_payload(), "p��stal\n")
2730 self.assertEqual(msg.get_payload(decode=True),
2731 "pöstal\n".encode('utf-8'))
2732
2733 def test_8bit_in_quopri_body(self):
2734 # This is non-RFC compliant data...without 'decode' the library code
2735 # decodes the body using the charset from the headers, and because the
2736 # source byte really is utf-8 this works. This is likely to fail
2737 # against real dirty data (ie: produce mojibake), but the data is
2738 # invalid anyway so it is as good a guess as any. But this means that
2739 # this test just confirms the current behavior; that behavior is not
2740 # necessarily the best possible behavior. With 'decode' it is
2741 # returning the raw bytes, so that test should be of correct behavior,
2742 # or at least produce the same result that email4 did.
2743 m = self.bodytest_msg.format(charset='utf-8',
2744 cte='quoted-printable',
2745 bodyline='p=C3=B6stál').encode('utf-8')
2746 msg = email.message_from_bytes(m)
2747 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
2748 self.assertEqual(msg.get_payload(decode=True),
2749 'pöstál\n'.encode('utf-8'))
2750
2751 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
2752 # This is similar to the previous test, but proves that if the 8bit
2753 # byte is undecodeable in the specified charset, it gets replaced
2754 # by the unicode 'unknown' character. Again, this may or may not
2755 # be the ideal behavior. Note that if decode=False none of the
2756 # decoders will get involved, so this is the only test we need
2757 # for this behavior.
2758 m = self.bodytest_msg.format(charset='ascii',
2759 cte='quoted-printable',
2760 bodyline='p=C3=B6stál').encode('utf-8')
2761 msg = email.message_from_bytes(m)
2762 self.assertEqual(msg.get_payload(), 'p=C3=B6st��l\n')
2763 self.assertEqual(msg.get_payload(decode=True),
2764 'pöstál\n'.encode('utf-8'))
2765
2766 def test_8bit_in_base64_body(self):
2767 # Sticking an 8bit byte in a base64 block makes it undecodable by
2768 # normal means, so the block is returned undecoded, but as bytes.
2769 m = self.bodytest_msg.format(charset='utf-8',
2770 cte='base64',
2771 bodyline='cMO2c3RhbAá=').encode('utf-8')
2772 msg = email.message_from_bytes(m)
2773 self.assertEqual(msg.get_payload(decode=True),
2774 'cMO2c3RhbAá=\n'.encode('utf-8'))
2775
2776 def test_8bit_in_uuencode_body(self):
2777 # Sticking an 8bit byte in a uuencode block makes it undecodable by
2778 # normal means, so the block is returned undecoded, but as bytes.
2779 m = self.bodytest_msg.format(charset='utf-8',
2780 cte='uuencode',
2781 bodyline='<,.V<W1A; á ').encode('utf-8')
2782 msg = email.message_from_bytes(m)
2783 self.assertEqual(msg.get_payload(decode=True),
2784 '<,.V<W1A; á \n'.encode('utf-8'))
2785
2786
2787 headertest_msg = textwrap.dedent("""\
2788 From: foo@bar.com
2789 To: báz
2790 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
2791 \tJean de Baddie
2792 From: göst
2793
2794 Yes, they are flying.
2795 """).encode('utf-8')
2796
2797 def test_get_8bit_header(self):
2798 msg = email.message_from_bytes(self.headertest_msg)
2799 self.assertEqual(msg.get('to'), 'b??z')
2800 self.assertEqual(msg['to'], 'b??z')
2801
2802 def test_print_8bit_headers(self):
2803 msg = email.message_from_bytes(self.headertest_msg)
2804 self.assertEqual(str(msg),
2805 self.headertest_msg.decode(
2806 'ascii', 'replace').replace('�', '?'))
2807
2808 def test_values_with_8bit_headers(self):
2809 msg = email.message_from_bytes(self.headertest_msg)
2810 self.assertListEqual(msg.values(),
2811 ['foo@bar.com',
2812 'b??z',
2813 'Maintenant je vous pr??sente mon '
2814 'coll??gue, le pouf c??l??bre\n'
2815 '\tJean de Baddie',
2816 "g??st"])
2817
2818 def test_items_with_8bit_headers(self):
2819 msg = email.message_from_bytes(self.headertest_msg)
2820 self.assertListEqual(msg.items(),
2821 [('From', 'foo@bar.com'),
2822 ('To', 'b??z'),
2823 ('Subject', 'Maintenant je vous pr??sente mon '
2824 'coll??gue, le pouf c??l??bre\n'
2825 '\tJean de Baddie'),
2826 ('From', 'g??st')])
2827
2828 def test_get_all_with_8bit_headers(self):
2829 msg = email.message_from_bytes(self.headertest_msg)
2830 self.assertListEqual(msg.get_all('from'),
2831 ['foo@bar.com',
2832 'g??st'])
2833
2834 non_latin_bin_msg = textwrap.dedent("""\
2835 From: foo@bar.com
2836 To: báz
2837 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
2838 \tJean de Baddie
2839 Mime-Version: 1.0
2840 Content-Type: text/plain; charset="utf-8"
2841 Content-Transfer-Encoding: 8bit
2842
2843 Да, они летят.
2844 """).encode('utf-8')
2845
2846 def test_bytes_generator(self):
2847 msg = email.message_from_bytes(self.non_latin_bin_msg)
2848 out = BytesIO()
2849 email.generator.BytesGenerator(out).flatten(msg)
2850 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
2851
2852 # XXX: ultimately the '?' should turn into CTE encoded bytes
2853 # using 'unknown-8bit' charset.
2854 non_latin_bin_msg_as7bit = textwrap.dedent("""\
2855 From: foo@bar.com
2856 To: b??z
2857 Subject: Maintenant je vous pr??sente mon coll??gue, le pouf c??l??bre
2858 \tJean de Baddie
2859 Mime-Version: 1.0
2860 Content-Type: text/plain; charset="utf-8"
2861 Content-Transfer-Encoding: base64
2862
2863 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
2864 """)
2865
2866 def test_generator_handles_8bit(self):
2867 msg = email.message_from_bytes(self.non_latin_bin_msg)
2868 out = StringIO()
2869 email.generator.Generator(out).flatten(msg)
2870 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit)
2871
2872 def test_bytes_generator_with_unix_from(self):
2873 # The unixfrom contains a current date, so we can't check it
2874 # literally. Just make sure the first word is 'From' and the
2875 # rest of the message matches the input.
2876 msg = email.message_from_bytes(self.non_latin_bin_msg)
2877 out = BytesIO()
2878 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
2879 lines = out.getvalue().split(b'\n')
2880 self.assertEqual(lines[0].split()[0], b'From')
2881 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
2882
2883 def test_message_from_binary_file(self):
2884 fn = 'test.msg'
2885 self.addCleanup(unlink, fn)
2886 with open(fn, 'wb') as testfile:
2887 testfile.write(self.non_latin_bin_msg)
2888 m = email.parser.BytesParser().parse(open(fn, 'rb'))
2889 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
2890
2891 latin_bin_msg = textwrap.dedent("""\
2892 From: foo@bar.com
2893 To: Dinsdale
2894 Subject: Nudge nudge, wink, wink
2895 Mime-Version: 1.0
2896 Content-Type: text/plain; charset="latin-1"
2897 Content-Transfer-Encoding: 8bit
2898
2899 oh là là, know what I mean, know what I mean?
2900 """).encode('latin-1')
2901
2902 latin_bin_msg_as7bit = textwrap.dedent("""\
2903 From: foo@bar.com
2904 To: Dinsdale
2905 Subject: Nudge nudge, wink, wink
2906 Mime-Version: 1.0
2907 Content-Type: text/plain; charset="iso-8859-1"
2908 Content-Transfer-Encoding: quoted-printable
2909
2910 oh l=E0 l=E0, know what I mean, know what I mean?
2911 """)
2912
2913 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
2914 m = email.message_from_bytes(self.latin_bin_msg)
2915 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
2916
2917 def test_decoded_generator_emits_unicode_body(self):
2918 m = email.message_from_bytes(self.latin_bin_msg)
2919 out = StringIO()
2920 email.generator.DecodedGenerator(out).flatten(m)
2921 #DecodedHeader output contains an extra blank line compared
2922 #to the input message. RDM: not sure if this is a bug or not,
2923 #but it is not specific to the 8bit->7bit conversion.
2924 self.assertEqual(out.getvalue(),
2925 self.latin_bin_msg.decode('latin-1')+'\n')
2926
2927 def test_bytes_feedparser(self):
2928 bfp = email.feedparser.BytesFeedParser()
2929 for i in range(0, len(self.latin_bin_msg), 10):
2930 bfp.feed(self.latin_bin_msg[i:i+10])
2931 m = bfp.close()
2932 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
2933
2934
2935class TestBytesGeneratorIdempotent(TestIdempotent):
2936
2937 def _msgobj(self, filename):
2938 with openfile(filename, 'rb') as fp:
2939 data = fp.read()
2940 msg = email.message_from_bytes(data)
2941 return msg, data
2942
2943 def _idempotent(self, msg, data):
2944 b = BytesIO()
2945 g = email.generator.BytesGenerator(b, maxheaderlen=0)
2946 g.flatten(msg)
2947 self.assertEqual(data, b.getvalue())
2948
2949 maxDiff = None
2950
2951 def assertEqual(self, str1, str2):
2952 self.assertListEqual(str1.split(b'\n'), str2.split(b'\n'))
2953
2954
2955
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002956class TestBase64(unittest.TestCase):
2957 def test_len(self):
2958 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00002959 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002960 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002961 for size in range(15):
2962 if size == 0 : bsize = 0
2963 elif size <= 3 : bsize = 4
2964 elif size <= 6 : bsize = 8
2965 elif size <= 9 : bsize = 12
2966 elif size <= 12: bsize = 16
2967 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00002968 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002969
2970 def test_decode(self):
2971 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00002972 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002973 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002974
2975 def test_encode(self):
2976 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002977 eq(base64mime.body_encode(b''), b'')
2978 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002979 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002980 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002981 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002982 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002983eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
2984eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
2985eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
2986eHh4eCB4eHh4IA==
2987""")
2988 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002989 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00002990 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002991eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
2992eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
2993eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
2994eHh4eCB4eHh4IA==\r
2995""")
2996
2997 def test_header_encode(self):
2998 eq = self.assertEqual
2999 he = base64mime.header_encode
3000 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003001 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3002 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003003 # Test the charset option
3004 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3005 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003006
3007
3008
3009class TestQuopri(unittest.TestCase):
3010 def setUp(self):
3011 # Set of characters (as byte integers) that don't need to be encoded
3012 # in headers.
3013 self.hlit = list(chain(
3014 range(ord('a'), ord('z') + 1),
3015 range(ord('A'), ord('Z') + 1),
3016 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003017 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003018 # Set of characters (as byte integers) that do need to be encoded in
3019 # headers.
3020 self.hnon = [c for c in range(256) if c not in self.hlit]
3021 assert len(self.hlit) + len(self.hnon) == 256
3022 # Set of characters (as byte integers) that don't need to be encoded
3023 # in bodies.
3024 self.blit = list(range(ord(' '), ord('~') + 1))
3025 self.blit.append(ord('\t'))
3026 self.blit.remove(ord('='))
3027 # Set of characters (as byte integers) that do need to be encoded in
3028 # bodies.
3029 self.bnon = [c for c in range(256) if c not in self.blit]
3030 assert len(self.blit) + len(self.bnon) == 256
3031
Guido van Rossum9604e662007-08-30 03:46:43 +00003032 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003033 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003034 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003035 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003036 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003037 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003038 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003039
Guido van Rossum9604e662007-08-30 03:46:43 +00003040 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003041 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003042 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003043 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003044 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003045 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003046 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003047
3048 def test_header_quopri_len(self):
3049 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003050 eq(quoprimime.header_length(b'hello'), 5)
3051 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003052 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003053 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003054 # =?xxx?q?...?= means 10 extra characters
3055 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003056 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3057 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003058 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003059 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003060 # =?xxx?q?...?= means 10 extra characters
3061 10)
3062 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003063 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003064 'expected length 1 for %r' % chr(c))
3065 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003066 # Space is special; it's encoded to _
3067 if c == ord(' '):
3068 continue
3069 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003070 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003071 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003072
3073 def test_body_quopri_len(self):
3074 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003075 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003076 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003077 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003078 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003079
3080 def test_quote_unquote_idempotent(self):
3081 for x in range(256):
3082 c = chr(x)
3083 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3084
3085 def test_header_encode(self):
3086 eq = self.assertEqual
3087 he = quoprimime.header_encode
3088 eq(he(b'hello'), '=?iso-8859-1?q?hello?=')
3089 eq(he(b'hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=')
3090 eq(he(b'hello\nworld'), '=?iso-8859-1?q?hello=0Aworld?=')
3091 # Test a non-ASCII character
3092 eq(he(b'hello\xc7there'), '=?iso-8859-1?q?hello=C7there?=')
3093
3094 def test_decode(self):
3095 eq = self.assertEqual
3096 eq(quoprimime.decode(''), '')
3097 eq(quoprimime.decode('hello'), 'hello')
3098 eq(quoprimime.decode('hello', 'X'), 'hello')
3099 eq(quoprimime.decode('hello\nworld', 'X'), 'helloXworld')
3100
3101 def test_encode(self):
3102 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003103 eq(quoprimime.body_encode(''), '')
3104 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003105 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003106 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003107 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003108 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003109xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3110 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3111x xxxx xxxx xxxx xxxx=20""")
3112 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003113 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3114 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003115xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3116 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3117x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003118 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003119one line
3120
3121two line"""), """\
3122one line
3123
3124two line""")
3125
3126
3127
3128# Test the Charset class
3129class TestCharset(unittest.TestCase):
3130 def tearDown(self):
3131 from email import charset as CharsetModule
3132 try:
3133 del CharsetModule.CHARSETS['fake']
3134 except KeyError:
3135 pass
3136
Guido van Rossum9604e662007-08-30 03:46:43 +00003137 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003138 eq = self.assertEqual
3139 # Make sure us-ascii = no Unicode conversion
3140 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00003141 eq(c.header_encode('Hello World!'), 'Hello World!')
3142 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003143 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00003144 self.assertRaises(UnicodeError, c.header_encode, s)
3145 c = Charset('utf-8')
3146 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003147
3148 def test_body_encode(self):
3149 eq = self.assertEqual
3150 # Try a charset with QP body encoding
3151 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003152 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003153 # Try a charset with Base64 body encoding
3154 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003155 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003156 # Try a charset with None body encoding
3157 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003158 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003159 # Try the convert argument, where input codec != output codec
3160 c = Charset('euc-jp')
3161 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00003162 # XXX FIXME
3163## try:
3164## eq('\x1b$B5FCO;~IW\x1b(B',
3165## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
3166## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
3167## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
3168## except LookupError:
3169## # We probably don't have the Japanese codecs installed
3170## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003171 # Testing SF bug #625509, which we have to fake, since there are no
3172 # built-in encodings where the header encoding is QP but the body
3173 # encoding is not.
3174 from email import charset as CharsetModule
3175 CharsetModule.add_charset('fake', CharsetModule.QP, None)
3176 c = Charset('fake')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003177 eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003178
3179 def test_unicode_charset_name(self):
3180 charset = Charset('us-ascii')
3181 self.assertEqual(str(charset), 'us-ascii')
3182 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
3183
3184
3185
3186# Test multilingual MIME headers.
3187class TestHeader(TestEmailBase):
3188 def test_simple(self):
3189 eq = self.ndiffAssertEqual
3190 h = Header('Hello World!')
3191 eq(h.encode(), 'Hello World!')
3192 h.append(' Goodbye World!')
3193 eq(h.encode(), 'Hello World! Goodbye World!')
3194
3195 def test_simple_surprise(self):
3196 eq = self.ndiffAssertEqual
3197 h = Header('Hello World!')
3198 eq(h.encode(), 'Hello World!')
3199 h.append('Goodbye World!')
3200 eq(h.encode(), 'Hello World! Goodbye World!')
3201
3202 def test_header_needs_no_decoding(self):
3203 h = 'no decoding needed'
3204 self.assertEqual(decode_header(h), [(h, None)])
3205
3206 def test_long(self):
3207 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
3208 maxlinelen=76)
3209 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003210 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003211
3212 def test_multilingual(self):
3213 eq = self.ndiffAssertEqual
3214 g = Charset("iso-8859-1")
3215 cz = Charset("iso-8859-2")
3216 utf8 = Charset("utf-8")
3217 g_head = (b'Die Mieter treten hier ein werden mit einem '
3218 b'Foerderband komfortabel den Korridor entlang, '
3219 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
3220 b'gegen die rotierenden Klingen bef\xf6rdert. ')
3221 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
3222 b'd\xf9vtipu.. ')
3223 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
3224 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
3225 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
3226 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
3227 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
3228 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
3229 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
3230 '\u3044\u307e\u3059\u3002')
3231 h = Header(g_head, g)
3232 h.append(cz_head, cz)
3233 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00003234 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003235 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00003236=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
3237 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
3238 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
3239 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003240 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
3241 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
3242 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
3243 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00003244 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
3245 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
3246 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
3247 decoded = decode_header(enc)
3248 eq(len(decoded), 3)
3249 eq(decoded[0], (g_head, 'iso-8859-1'))
3250 eq(decoded[1], (cz_head, 'iso-8859-2'))
3251 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003252 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00003253 eq(ustr,
3254 (b'Die Mieter treten hier ein werden mit einem Foerderband '
3255 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
3256 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
3257 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
3258 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
3259 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
3260 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
3261 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
3262 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
3263 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
3264 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
3265 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
3266 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
3267 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
3268 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
3269 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
3270 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003271 # Test make_header()
3272 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00003273 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003274
3275 def test_empty_header_encode(self):
3276 h = Header()
3277 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00003278
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003279 def test_header_ctor_default_args(self):
3280 eq = self.ndiffAssertEqual
3281 h = Header()
3282 eq(h, '')
3283 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00003284 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003285
3286 def test_explicit_maxlinelen(self):
3287 eq = self.ndiffAssertEqual
3288 hstr = ('A very long line that must get split to something other '
3289 'than at the 76th character boundary to test the non-default '
3290 'behavior')
3291 h = Header(hstr)
3292 eq(h.encode(), '''\
3293A very long line that must get split to something other than at the 76th
3294 character boundary to test the non-default behavior''')
3295 eq(str(h), hstr)
3296 h = Header(hstr, header_name='Subject')
3297 eq(h.encode(), '''\
3298A very long line that must get split to something other than at the
3299 76th character boundary to test the non-default behavior''')
3300 eq(str(h), hstr)
3301 h = Header(hstr, maxlinelen=1024, header_name='Subject')
3302 eq(h.encode(), hstr)
3303 eq(str(h), hstr)
3304
Guido van Rossum9604e662007-08-30 03:46:43 +00003305 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003306 eq = self.ndiffAssertEqual
3307 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003308 x = 'xxxx ' * 20
3309 h.append(x)
3310 s = h.encode()
3311 eq(s, """\
3312=?iso-8859-1?q?xxx?=
3313 =?iso-8859-1?q?x_?=
3314 =?iso-8859-1?q?xx?=
3315 =?iso-8859-1?q?xx?=
3316 =?iso-8859-1?q?_x?=
3317 =?iso-8859-1?q?xx?=
3318 =?iso-8859-1?q?x_?=
3319 =?iso-8859-1?q?xx?=
3320 =?iso-8859-1?q?xx?=
3321 =?iso-8859-1?q?_x?=
3322 =?iso-8859-1?q?xx?=
3323 =?iso-8859-1?q?x_?=
3324 =?iso-8859-1?q?xx?=
3325 =?iso-8859-1?q?xx?=
3326 =?iso-8859-1?q?_x?=
3327 =?iso-8859-1?q?xx?=
3328 =?iso-8859-1?q?x_?=
3329 =?iso-8859-1?q?xx?=
3330 =?iso-8859-1?q?xx?=
3331 =?iso-8859-1?q?_x?=
3332 =?iso-8859-1?q?xx?=
3333 =?iso-8859-1?q?x_?=
3334 =?iso-8859-1?q?xx?=
3335 =?iso-8859-1?q?xx?=
3336 =?iso-8859-1?q?_x?=
3337 =?iso-8859-1?q?xx?=
3338 =?iso-8859-1?q?x_?=
3339 =?iso-8859-1?q?xx?=
3340 =?iso-8859-1?q?xx?=
3341 =?iso-8859-1?q?_x?=
3342 =?iso-8859-1?q?xx?=
3343 =?iso-8859-1?q?x_?=
3344 =?iso-8859-1?q?xx?=
3345 =?iso-8859-1?q?xx?=
3346 =?iso-8859-1?q?_x?=
3347 =?iso-8859-1?q?xx?=
3348 =?iso-8859-1?q?x_?=
3349 =?iso-8859-1?q?xx?=
3350 =?iso-8859-1?q?xx?=
3351 =?iso-8859-1?q?_x?=
3352 =?iso-8859-1?q?xx?=
3353 =?iso-8859-1?q?x_?=
3354 =?iso-8859-1?q?xx?=
3355 =?iso-8859-1?q?xx?=
3356 =?iso-8859-1?q?_x?=
3357 =?iso-8859-1?q?xx?=
3358 =?iso-8859-1?q?x_?=
3359 =?iso-8859-1?q?xx?=
3360 =?iso-8859-1?q?xx?=
3361 =?iso-8859-1?q?_?=""")
3362 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003363 h = Header(charset='iso-8859-1', maxlinelen=40)
3364 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003365 s = h.encode()
3366 eq(s, """\
3367=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
3368 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
3369 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
3370 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
3371 =?iso-8859-1?q?_xxxx_xxxx_?=""")
3372 eq(x, str(make_header(decode_header(s))))
3373
3374 def test_base64_splittable(self):
3375 eq = self.ndiffAssertEqual
3376 h = Header(charset='koi8-r', maxlinelen=20)
3377 x = 'xxxx ' * 20
3378 h.append(x)
3379 s = h.encode()
3380 eq(s, """\
3381=?koi8-r?b?eHh4?=
3382 =?koi8-r?b?eCB4?=
3383 =?koi8-r?b?eHh4?=
3384 =?koi8-r?b?IHh4?=
3385 =?koi8-r?b?eHgg?=
3386 =?koi8-r?b?eHh4?=
3387 =?koi8-r?b?eCB4?=
3388 =?koi8-r?b?eHh4?=
3389 =?koi8-r?b?IHh4?=
3390 =?koi8-r?b?eHgg?=
3391 =?koi8-r?b?eHh4?=
3392 =?koi8-r?b?eCB4?=
3393 =?koi8-r?b?eHh4?=
3394 =?koi8-r?b?IHh4?=
3395 =?koi8-r?b?eHgg?=
3396 =?koi8-r?b?eHh4?=
3397 =?koi8-r?b?eCB4?=
3398 =?koi8-r?b?eHh4?=
3399 =?koi8-r?b?IHh4?=
3400 =?koi8-r?b?eHgg?=
3401 =?koi8-r?b?eHh4?=
3402 =?koi8-r?b?eCB4?=
3403 =?koi8-r?b?eHh4?=
3404 =?koi8-r?b?IHh4?=
3405 =?koi8-r?b?eHgg?=
3406 =?koi8-r?b?eHh4?=
3407 =?koi8-r?b?eCB4?=
3408 =?koi8-r?b?eHh4?=
3409 =?koi8-r?b?IHh4?=
3410 =?koi8-r?b?eHgg?=
3411 =?koi8-r?b?eHh4?=
3412 =?koi8-r?b?eCB4?=
3413 =?koi8-r?b?eHh4?=
3414 =?koi8-r?b?IA==?=""")
3415 eq(x, str(make_header(decode_header(s))))
3416 h = Header(charset='koi8-r', maxlinelen=40)
3417 h.append(x)
3418 s = h.encode()
3419 eq(s, """\
3420=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
3421 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
3422 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
3423 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
3424 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
3425 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
3426 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003427
3428 def test_us_ascii_header(self):
3429 eq = self.assertEqual
3430 s = 'hello'
3431 x = decode_header(s)
3432 eq(x, [('hello', None)])
3433 h = make_header(x)
3434 eq(s, h.encode())
3435
3436 def test_string_charset(self):
3437 eq = self.assertEqual
3438 h = Header()
3439 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00003440 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003441
3442## def test_unicode_error(self):
3443## raises = self.assertRaises
3444## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
3445## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
3446## h = Header()
3447## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
3448## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
3449## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
3450
3451 def test_utf8_shortest(self):
3452 eq = self.assertEqual
3453 h = Header('p\xf6stal', 'utf-8')
3454 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
3455 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
3456 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
3457
3458 def test_bad_8bit_header(self):
3459 raises = self.assertRaises
3460 eq = self.assertEqual
3461 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
3462 raises(UnicodeError, Header, x)
3463 h = Header()
3464 raises(UnicodeError, h.append, x)
3465 e = x.decode('utf-8', 'replace')
3466 eq(str(Header(x, errors='replace')), e)
3467 h.append(x, errors='replace')
3468 eq(str(h), e)
3469
3470 def test_encoded_adjacent_nonencoded(self):
3471 eq = self.assertEqual
3472 h = Header()
3473 h.append('hello', 'iso-8859-1')
3474 h.append('world')
3475 s = h.encode()
3476 eq(s, '=?iso-8859-1?q?hello?= world')
3477 h = make_header(decode_header(s))
3478 eq(h.encode(), s)
3479
3480 def test_whitespace_eater(self):
3481 eq = self.assertEqual
3482 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
3483 parts = decode_header(s)
3484 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
3485 hdr = make_header(parts)
3486 eq(hdr.encode(),
3487 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
3488
3489 def test_broken_base64_header(self):
3490 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00003491 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003492 raises(errors.HeaderParseError, decode_header, s)
3493
3494
3495
3496# Test RFC 2231 header parameters (en/de)coding
3497class TestRFC2231(TestEmailBase):
3498 def test_get_param(self):
3499 eq = self.assertEqual
3500 msg = self._msgobj('msg_29.txt')
3501 eq(msg.get_param('title'),
3502 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3503 eq(msg.get_param('title', unquote=False),
3504 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
3505
3506 def test_set_param(self):
3507 eq = self.ndiffAssertEqual
3508 msg = Message()
3509 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3510 charset='us-ascii')
3511 eq(msg.get_param('title'),
3512 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
3513 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3514 charset='us-ascii', language='en')
3515 eq(msg.get_param('title'),
3516 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3517 msg = self._msgobj('msg_01.txt')
3518 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3519 charset='us-ascii', language='en')
3520 eq(msg.as_string(maxheaderlen=78), """\
3521Return-Path: <bbb@zzz.org>
3522Delivered-To: bbb@zzz.org
3523Received: by mail.zzz.org (Postfix, from userid 889)
3524\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3525MIME-Version: 1.0
3526Content-Transfer-Encoding: 7bit
3527Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3528From: bbb@ddd.com (John X. Doe)
3529To: bbb@zzz.org
3530Subject: This is a test message
3531Date: Fri, 4 May 2001 14:05:44 -0400
3532Content-Type: text/plain; charset=us-ascii;
3533 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3534
3535
3536Hi,
3537
3538Do you like this message?
3539
3540-Me
3541""")
3542
3543 def test_del_param(self):
3544 eq = self.ndiffAssertEqual
3545 msg = self._msgobj('msg_01.txt')
3546 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
3547 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3548 charset='us-ascii', language='en')
3549 msg.del_param('foo', header='Content-Type')
3550 eq(msg.as_string(maxheaderlen=78), """\
3551Return-Path: <bbb@zzz.org>
3552Delivered-To: bbb@zzz.org
3553Received: by mail.zzz.org (Postfix, from userid 889)
3554\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3555MIME-Version: 1.0
3556Content-Transfer-Encoding: 7bit
3557Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3558From: bbb@ddd.com (John X. Doe)
3559To: bbb@zzz.org
3560Subject: This is a test message
3561Date: Fri, 4 May 2001 14:05:44 -0400
3562Content-Type: text/plain; charset="us-ascii";
3563 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3564
3565
3566Hi,
3567
3568Do you like this message?
3569
3570-Me
3571""")
3572
3573 def test_rfc2231_get_content_charset(self):
3574 eq = self.assertEqual
3575 msg = self._msgobj('msg_32.txt')
3576 eq(msg.get_content_charset(), 'us-ascii')
3577
3578 def test_rfc2231_no_language_or_charset(self):
3579 m = '''\
3580Content-Transfer-Encoding: 8bit
3581Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
3582Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
3583
3584'''
3585 msg = email.message_from_string(m)
3586 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003587 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003588 self.assertEqual(
3589 param,
3590 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
3591
3592 def test_rfc2231_no_language_or_charset_in_filename(self):
3593 m = '''\
3594Content-Disposition: inline;
3595\tfilename*0*="''This%20is%20even%20more%20";
3596\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3597\tfilename*2="is it not.pdf"
3598
3599'''
3600 msg = email.message_from_string(m)
3601 self.assertEqual(msg.get_filename(),
3602 'This is even more ***fun*** is it not.pdf')
3603
3604 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
3605 m = '''\
3606Content-Disposition: inline;
3607\tfilename*0*="''This%20is%20even%20more%20";
3608\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3609\tfilename*2="is it not.pdf"
3610
3611'''
3612 msg = email.message_from_string(m)
3613 self.assertEqual(msg.get_filename(),
3614 'This is even more ***fun*** is it not.pdf')
3615
3616 def test_rfc2231_partly_encoded(self):
3617 m = '''\
3618Content-Disposition: inline;
3619\tfilename*0="''This%20is%20even%20more%20";
3620\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3621\tfilename*2="is it not.pdf"
3622
3623'''
3624 msg = email.message_from_string(m)
3625 self.assertEqual(
3626 msg.get_filename(),
3627 'This%20is%20even%20more%20***fun*** is it not.pdf')
3628
3629 def test_rfc2231_partly_nonencoded(self):
3630 m = '''\
3631Content-Disposition: inline;
3632\tfilename*0="This%20is%20even%20more%20";
3633\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
3634\tfilename*2="is it not.pdf"
3635
3636'''
3637 msg = email.message_from_string(m)
3638 self.assertEqual(
3639 msg.get_filename(),
3640 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
3641
3642 def test_rfc2231_no_language_or_charset_in_boundary(self):
3643 m = '''\
3644Content-Type: multipart/alternative;
3645\tboundary*0*="''This%20is%20even%20more%20";
3646\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
3647\tboundary*2="is it not.pdf"
3648
3649'''
3650 msg = email.message_from_string(m)
3651 self.assertEqual(msg.get_boundary(),
3652 'This is even more ***fun*** is it not.pdf')
3653
3654 def test_rfc2231_no_language_or_charset_in_charset(self):
3655 # This is a nonsensical charset value, but tests the code anyway
3656 m = '''\
3657Content-Type: text/plain;
3658\tcharset*0*="This%20is%20even%20more%20";
3659\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
3660\tcharset*2="is it not.pdf"
3661
3662'''
3663 msg = email.message_from_string(m)
3664 self.assertEqual(msg.get_content_charset(),
3665 'this is even more ***fun*** is it not.pdf')
3666
3667 def test_rfc2231_bad_encoding_in_filename(self):
3668 m = '''\
3669Content-Disposition: inline;
3670\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
3671\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3672\tfilename*2="is it not.pdf"
3673
3674'''
3675 msg = email.message_from_string(m)
3676 self.assertEqual(msg.get_filename(),
3677 'This is even more ***fun*** is it not.pdf')
3678
3679 def test_rfc2231_bad_encoding_in_charset(self):
3680 m = """\
3681Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
3682
3683"""
3684 msg = email.message_from_string(m)
3685 # This should return None because non-ascii characters in the charset
3686 # are not allowed.
3687 self.assertEqual(msg.get_content_charset(), None)
3688
3689 def test_rfc2231_bad_character_in_charset(self):
3690 m = """\
3691Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
3692
3693"""
3694 msg = email.message_from_string(m)
3695 # This should return None because non-ascii characters in the charset
3696 # are not allowed.
3697 self.assertEqual(msg.get_content_charset(), None)
3698
3699 def test_rfc2231_bad_character_in_filename(self):
3700 m = '''\
3701Content-Disposition: inline;
3702\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
3703\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3704\tfilename*2*="is it not.pdf%E2"
3705
3706'''
3707 msg = email.message_from_string(m)
3708 self.assertEqual(msg.get_filename(),
3709 'This is even more ***fun*** is it not.pdf\ufffd')
3710
3711 def test_rfc2231_unknown_encoding(self):
3712 m = """\
3713Content-Transfer-Encoding: 8bit
3714Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
3715
3716"""
3717 msg = email.message_from_string(m)
3718 self.assertEqual(msg.get_filename(), 'myfile.txt')
3719
3720 def test_rfc2231_single_tick_in_filename_extended(self):
3721 eq = self.assertEqual
3722 m = """\
3723Content-Type: application/x-foo;
3724\tname*0*=\"Frank's\"; name*1*=\" Document\"
3725
3726"""
3727 msg = email.message_from_string(m)
3728 charset, language, s = msg.get_param('name')
3729 eq(charset, None)
3730 eq(language, None)
3731 eq(s, "Frank's Document")
3732
3733 def test_rfc2231_single_tick_in_filename(self):
3734 m = """\
3735Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
3736
3737"""
3738 msg = email.message_from_string(m)
3739 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003740 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003741 self.assertEqual(param, "Frank's Document")
3742
3743 def test_rfc2231_tick_attack_extended(self):
3744 eq = self.assertEqual
3745 m = """\
3746Content-Type: application/x-foo;
3747\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
3748
3749"""
3750 msg = email.message_from_string(m)
3751 charset, language, s = msg.get_param('name')
3752 eq(charset, 'us-ascii')
3753 eq(language, 'en-us')
3754 eq(s, "Frank's Document")
3755
3756 def test_rfc2231_tick_attack(self):
3757 m = """\
3758Content-Type: application/x-foo;
3759\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
3760
3761"""
3762 msg = email.message_from_string(m)
3763 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003764 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003765 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
3766
3767 def test_rfc2231_no_extended_values(self):
3768 eq = self.assertEqual
3769 m = """\
3770Content-Type: application/x-foo; name=\"Frank's Document\"
3771
3772"""
3773 msg = email.message_from_string(m)
3774 eq(msg.get_param('name'), "Frank's Document")
3775
3776 def test_rfc2231_encoded_then_unencoded_segments(self):
3777 eq = self.assertEqual
3778 m = """\
3779Content-Type: application/x-foo;
3780\tname*0*=\"us-ascii'en-us'My\";
3781\tname*1=\" Document\";
3782\tname*2*=\" For You\"
3783
3784"""
3785 msg = email.message_from_string(m)
3786 charset, language, s = msg.get_param('name')
3787 eq(charset, 'us-ascii')
3788 eq(language, 'en-us')
3789 eq(s, 'My Document For You')
3790
3791 def test_rfc2231_unencoded_then_encoded_segments(self):
3792 eq = self.assertEqual
3793 m = """\
3794Content-Type: application/x-foo;
3795\tname*0=\"us-ascii'en-us'My\";
3796\tname*1*=\" Document\";
3797\tname*2*=\" For You\"
3798
3799"""
3800 msg = email.message_from_string(m)
3801 charset, language, s = msg.get_param('name')
3802 eq(charset, 'us-ascii')
3803 eq(language, 'en-us')
3804 eq(s, 'My Document For You')
3805
3806
3807
R. David Murraya8f480f2010-01-16 18:30:03 +00003808# Tests to ensure that signed parts of an email are completely preserved, as
3809# required by RFC1847 section 2.1. Note that these are incomplete, because the
3810# email package does not currently always preserve the body. See issue 1670765.
3811class TestSigned(TestEmailBase):
3812
3813 def _msg_and_obj(self, filename):
3814 with openfile(findfile(filename)) as fp:
3815 original = fp.read()
3816 msg = email.message_from_string(original)
3817 return original, msg
3818
3819 def _signed_parts_eq(self, original, result):
3820 # Extract the first mime part of each message
3821 import re
3822 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
3823 inpart = repart.search(original).group(2)
3824 outpart = repart.search(result).group(2)
3825 self.assertEqual(outpart, inpart)
3826
3827 def test_long_headers_as_string(self):
3828 original, msg = self._msg_and_obj('msg_45.txt')
3829 result = msg.as_string()
3830 self._signed_parts_eq(original, result)
3831
3832 def test_long_headers_as_string_maxheaderlen(self):
3833 original, msg = self._msg_and_obj('msg_45.txt')
3834 result = msg.as_string(maxheaderlen=60)
3835 self._signed_parts_eq(original, result)
3836
3837 def test_long_headers_flatten(self):
3838 original, msg = self._msg_and_obj('msg_45.txt')
3839 fp = StringIO()
3840 Generator(fp).flatten(msg)
3841 result = fp.getvalue()
3842 self._signed_parts_eq(original, result)
3843
3844
3845
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003846def _testclasses():
3847 mod = sys.modules[__name__]
3848 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
3849
3850
3851def suite():
3852 suite = unittest.TestSuite()
3853 for testclass in _testclasses():
3854 suite.addTest(unittest.makeSuite(testclass))
3855 return suite
3856
3857
3858def test_main():
3859 for testclass in _testclasses():
3860 run_unittest(testclass)
3861
3862
3863
3864if __name__ == '__main__':
3865 unittest.main(defaultTest='suite')