blob: 51fb2295a64a041cbc3cab0339bfe2cff66ceb9d [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R. David Murray96fd54e2010-10-08 15:55:28 +000039from test.support import findfile, run_unittest, unlink
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040from email.test import __file__ as landmark
41
42
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Ezio Melottib3aedd42010-11-20 19:04:17 +000048
Guido van Rossum8b3febe2007-08-30 01:15:14 +000049def openfile(filename, *args, **kws):
50 path = os.path.join(os.path.dirname(landmark), 'data', filename)
51 return open(path, *args, **kws)
52
53
Ezio Melottib3aedd42010-11-20 19:04:17 +000054
Guido van Rossum8b3febe2007-08-30 01:15:14 +000055# Base test class
56class TestEmailBase(unittest.TestCase):
57 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000058 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000059 if first != second:
60 sfirst = str(first)
61 ssecond = str(second)
62 rfirst = [repr(line) for line in sfirst.splitlines()]
63 rsecond = [repr(line) for line in ssecond.splitlines()]
64 diff = difflib.ndiff(rfirst, rsecond)
65 raise self.failureException(NL + NL.join(diff))
66
67 def _msgobj(self, filename):
68 with openfile(findfile(filename)) as fp:
69 return email.message_from_file(fp)
70
71
Ezio Melottib3aedd42010-11-20 19:04:17 +000072
Guido van Rossum8b3febe2007-08-30 01:15:14 +000073# Test various aspects of the Message class's API
74class TestMessageAPI(TestEmailBase):
75 def test_get_all(self):
76 eq = self.assertEqual
77 msg = self._msgobj('msg_20.txt')
78 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
79 eq(msg.get_all('xx', 'n/a'), 'n/a')
80
R. David Murraye5db2632010-11-20 15:10:13 +000081 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 eq = self.assertEqual
83 msg = Message()
84 eq(msg.get_charset(), None)
85 charset = Charset('iso-8859-1')
86 msg.set_charset(charset)
87 eq(msg['mime-version'], '1.0')
88 eq(msg.get_content_type(), 'text/plain')
89 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
90 eq(msg.get_param('charset'), 'iso-8859-1')
91 eq(msg['content-transfer-encoding'], 'quoted-printable')
92 eq(msg.get_charset().input_charset, 'iso-8859-1')
93 # Remove the charset
94 msg.set_charset(None)
95 eq(msg.get_charset(), None)
96 eq(msg['content-type'], 'text/plain')
97 # Try adding a charset when there's already MIME headers present
98 msg = Message()
99 msg['MIME-Version'] = '2.0'
100 msg['Content-Type'] = 'text/x-weird'
101 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
102 msg.set_charset(charset)
103 eq(msg['mime-version'], '2.0')
104 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
105 eq(msg['content-transfer-encoding'], 'quinted-puntable')
106
107 def test_set_charset_from_string(self):
108 eq = self.assertEqual
109 msg = Message()
110 msg.set_charset('us-ascii')
111 eq(msg.get_charset().input_charset, 'us-ascii')
112 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
113
114 def test_set_payload_with_charset(self):
115 msg = Message()
116 charset = Charset('iso-8859-1')
117 msg.set_payload('This is a string payload', charset)
118 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
119
120 def test_get_charsets(self):
121 eq = self.assertEqual
122
123 msg = self._msgobj('msg_08.txt')
124 charsets = msg.get_charsets()
125 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
126
127 msg = self._msgobj('msg_09.txt')
128 charsets = msg.get_charsets('dingbat')
129 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
130 'koi8-r'])
131
132 msg = self._msgobj('msg_12.txt')
133 charsets = msg.get_charsets()
134 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
135 'iso-8859-3', 'us-ascii', 'koi8-r'])
136
137 def test_get_filename(self):
138 eq = self.assertEqual
139
140 msg = self._msgobj('msg_04.txt')
141 filenames = [p.get_filename() for p in msg.get_payload()]
142 eq(filenames, ['msg.txt', 'msg.txt'])
143
144 msg = self._msgobj('msg_07.txt')
145 subpart = msg.get_payload(1)
146 eq(subpart.get_filename(), 'dingusfish.gif')
147
148 def test_get_filename_with_name_parameter(self):
149 eq = self.assertEqual
150
151 msg = self._msgobj('msg_44.txt')
152 filenames = [p.get_filename() for p in msg.get_payload()]
153 eq(filenames, ['msg.txt', 'msg.txt'])
154
155 def test_get_boundary(self):
156 eq = self.assertEqual
157 msg = self._msgobj('msg_07.txt')
158 # No quotes!
159 eq(msg.get_boundary(), 'BOUNDARY')
160
161 def test_set_boundary(self):
162 eq = self.assertEqual
163 # This one has no existing boundary parameter, but the Content-Type:
164 # header appears fifth.
165 msg = self._msgobj('msg_01.txt')
166 msg.set_boundary('BOUNDARY')
167 header, value = msg.items()[4]
168 eq(header.lower(), 'content-type')
169 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
170 # This one has a Content-Type: header, with a boundary, stuck in the
171 # middle of its headers. Make sure the order is preserved; it should
172 # be fifth.
173 msg = self._msgobj('msg_04.txt')
174 msg.set_boundary('BOUNDARY')
175 header, value = msg.items()[4]
176 eq(header.lower(), 'content-type')
177 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
178 # And this one has no Content-Type: header at all.
179 msg = self._msgobj('msg_03.txt')
180 self.assertRaises(errors.HeaderParseError,
181 msg.set_boundary, 'BOUNDARY')
182
R. David Murray73a559d2010-12-21 18:07:59 +0000183 def test_make_boundary(self):
184 msg = MIMEMultipart('form-data')
185 # Note that when the boundary gets created is an implementation
186 # detail and might change.
187 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
188 # Trigger creation of boundary
189 msg.as_string()
190 self.assertEqual(msg.items()[0][1][:33],
191 'multipart/form-data; boundary="==')
192 # XXX: there ought to be tests of the uniqueness of the boundary, too.
193
R. David Murray57c45ac2010-02-21 04:39:40 +0000194 def test_message_rfc822_only(self):
195 # Issue 7970: message/rfc822 not in multipart parsed by
196 # HeaderParser caused an exception when flattened.
Brett Cannon384917a2010-10-29 23:08:36 +0000197 with openfile(findfile('msg_46.txt')) as fp:
198 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000199 parser = HeaderParser()
200 msg = parser.parsestr(msgdata)
201 out = StringIO()
202 gen = Generator(out, True, 0)
203 gen.flatten(msg, False)
204 self.assertEqual(out.getvalue(), msgdata)
205
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000206 def test_get_decoded_payload(self):
207 eq = self.assertEqual
208 msg = self._msgobj('msg_10.txt')
209 # The outer message is a multipart
210 eq(msg.get_payload(decode=True), None)
211 # Subpart 1 is 7bit encoded
212 eq(msg.get_payload(0).get_payload(decode=True),
213 b'This is a 7bit encoded message.\n')
214 # Subpart 2 is quopri
215 eq(msg.get_payload(1).get_payload(decode=True),
216 b'\xa1This is a Quoted Printable encoded message!\n')
217 # Subpart 3 is base64
218 eq(msg.get_payload(2).get_payload(decode=True),
219 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000220 # Subpart 4 is base64 with a trailing newline, which
221 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000222 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000223 b'This is a Base64 encoded message.\n')
224 # Subpart 5 has no Content-Transfer-Encoding: header.
225 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000226 b'This has no Content-Transfer-Encoding: header.\n')
227
228 def test_get_decoded_uu_payload(self):
229 eq = self.assertEqual
230 msg = Message()
231 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
232 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
233 msg['content-transfer-encoding'] = cte
234 eq(msg.get_payload(decode=True), b'hello world')
235 # Now try some bogus data
236 msg.set_payload('foo')
237 eq(msg.get_payload(decode=True), b'foo')
238
R David Murraya2860e82011-04-16 09:20:30 -0400239 def test_get_payload_n_raises_on_non_multipart(self):
240 msg = Message()
241 self.assertRaises(TypeError, msg.get_payload, 1)
242
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000243 def test_decoded_generator(self):
244 eq = self.assertEqual
245 msg = self._msgobj('msg_07.txt')
246 with openfile('msg_17.txt') as fp:
247 text = fp.read()
248 s = StringIO()
249 g = DecodedGenerator(s)
250 g.flatten(msg)
251 eq(s.getvalue(), text)
252
253 def test__contains__(self):
254 msg = Message()
255 msg['From'] = 'Me'
256 msg['to'] = 'You'
257 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000258 self.assertTrue('from' in msg)
259 self.assertTrue('From' in msg)
260 self.assertTrue('FROM' in msg)
261 self.assertTrue('to' in msg)
262 self.assertTrue('To' in msg)
263 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000264
265 def test_as_string(self):
266 eq = self.ndiffAssertEqual
267 msg = self._msgobj('msg_01.txt')
268 with openfile('msg_01.txt') as fp:
269 text = fp.read()
270 eq(text, str(msg))
271 fullrepr = msg.as_string(unixfrom=True)
272 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000273 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000274 eq(text, NL.join(lines[1:]))
275
276 def test_bad_param(self):
277 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
278 self.assertEqual(msg.get_param('baz'), '')
279
280 def test_missing_filename(self):
281 msg = email.message_from_string("From: foo\n")
282 self.assertEqual(msg.get_filename(), None)
283
284 def test_bogus_filename(self):
285 msg = email.message_from_string(
286 "Content-Disposition: blarg; filename\n")
287 self.assertEqual(msg.get_filename(), '')
288
289 def test_missing_boundary(self):
290 msg = email.message_from_string("From: foo\n")
291 self.assertEqual(msg.get_boundary(), None)
292
293 def test_get_params(self):
294 eq = self.assertEqual
295 msg = email.message_from_string(
296 'X-Header: foo=one; bar=two; baz=three\n')
297 eq(msg.get_params(header='x-header'),
298 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
299 msg = email.message_from_string(
300 'X-Header: foo; bar=one; baz=two\n')
301 eq(msg.get_params(header='x-header'),
302 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
303 eq(msg.get_params(), None)
304 msg = email.message_from_string(
305 'X-Header: foo; bar="one"; baz=two\n')
306 eq(msg.get_params(header='x-header'),
307 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
308
309 def test_get_param_liberal(self):
310 msg = Message()
311 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
312 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
313
314 def test_get_param(self):
315 eq = self.assertEqual
316 msg = email.message_from_string(
317 "X-Header: foo=one; bar=two; baz=three\n")
318 eq(msg.get_param('bar', header='x-header'), 'two')
319 eq(msg.get_param('quuz', header='x-header'), None)
320 eq(msg.get_param('quuz'), None)
321 msg = email.message_from_string(
322 'X-Header: foo; bar="one"; baz=two\n')
323 eq(msg.get_param('foo', header='x-header'), '')
324 eq(msg.get_param('bar', header='x-header'), 'one')
325 eq(msg.get_param('baz', header='x-header'), 'two')
326 # XXX: We are not RFC-2045 compliant! We cannot parse:
327 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
328 # msg.get_param("weird")
329 # yet.
330
331 def test_get_param_funky_continuation_lines(self):
332 msg = self._msgobj('msg_22.txt')
333 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
334
335 def test_get_param_with_semis_in_quotes(self):
336 msg = email.message_from_string(
337 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
338 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
339 self.assertEqual(msg.get_param('name', unquote=False),
340 '"Jim&amp;&amp;Jill"')
341
R. David Murrayd48739f2010-04-14 18:59:18 +0000342 def test_get_param_with_quotes(self):
343 msg = email.message_from_string(
344 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
345 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
346 msg = email.message_from_string(
347 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
348 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
349
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000350 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000351 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000352 msg = email.message_from_string('Header: exists')
353 unless('header' in msg)
354 unless('Header' in msg)
355 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000356 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000357
358 def test_set_param(self):
359 eq = self.assertEqual
360 msg = Message()
361 msg.set_param('charset', 'iso-2022-jp')
362 eq(msg.get_param('charset'), 'iso-2022-jp')
363 msg.set_param('importance', 'high value')
364 eq(msg.get_param('importance'), 'high value')
365 eq(msg.get_param('importance', unquote=False), '"high value"')
366 eq(msg.get_params(), [('text/plain', ''),
367 ('charset', 'iso-2022-jp'),
368 ('importance', 'high value')])
369 eq(msg.get_params(unquote=False), [('text/plain', ''),
370 ('charset', '"iso-2022-jp"'),
371 ('importance', '"high value"')])
372 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
373 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
374
375 def test_del_param(self):
376 eq = self.assertEqual
377 msg = self._msgobj('msg_05.txt')
378 eq(msg.get_params(),
379 [('multipart/report', ''), ('report-type', 'delivery-status'),
380 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
381 old_val = msg.get_param("report-type")
382 msg.del_param("report-type")
383 eq(msg.get_params(),
384 [('multipart/report', ''),
385 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
386 msg.set_param("report-type", old_val)
387 eq(msg.get_params(),
388 [('multipart/report', ''),
389 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
390 ('report-type', old_val)])
391
392 def test_del_param_on_other_header(self):
393 msg = Message()
394 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
395 msg.del_param('filename', 'content-disposition')
396 self.assertEqual(msg['content-disposition'], 'attachment')
397
R David Murraya2860e82011-04-16 09:20:30 -0400398 def test_del_param_on_nonexistent_header(self):
399 msg = Message()
400 msg.del_param('filename', 'content-disposition')
401
402 def test_del_nonexistent_param(self):
403 msg = Message()
404 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
405 existing_header = msg['Content-Type']
406 msg.del_param('foobar', header='Content-Type')
407 self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"')
408
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000409 def test_set_type(self):
410 eq = self.assertEqual
411 msg = Message()
412 self.assertRaises(ValueError, msg.set_type, 'text')
413 msg.set_type('text/plain')
414 eq(msg['content-type'], 'text/plain')
415 msg.set_param('charset', 'us-ascii')
416 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
417 msg.set_type('text/html')
418 eq(msg['content-type'], 'text/html; charset="us-ascii"')
419
420 def test_set_type_on_other_header(self):
421 msg = Message()
422 msg['X-Content-Type'] = 'text/plain'
423 msg.set_type('application/octet-stream', 'X-Content-Type')
424 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
425
426 def test_get_content_type_missing(self):
427 msg = Message()
428 self.assertEqual(msg.get_content_type(), 'text/plain')
429
430 def test_get_content_type_missing_with_default_type(self):
431 msg = Message()
432 msg.set_default_type('message/rfc822')
433 self.assertEqual(msg.get_content_type(), 'message/rfc822')
434
435 def test_get_content_type_from_message_implicit(self):
436 msg = self._msgobj('msg_30.txt')
437 self.assertEqual(msg.get_payload(0).get_content_type(),
438 'message/rfc822')
439
440 def test_get_content_type_from_message_explicit(self):
441 msg = self._msgobj('msg_28.txt')
442 self.assertEqual(msg.get_payload(0).get_content_type(),
443 'message/rfc822')
444
445 def test_get_content_type_from_message_text_plain_implicit(self):
446 msg = self._msgobj('msg_03.txt')
447 self.assertEqual(msg.get_content_type(), 'text/plain')
448
449 def test_get_content_type_from_message_text_plain_explicit(self):
450 msg = self._msgobj('msg_01.txt')
451 self.assertEqual(msg.get_content_type(), 'text/plain')
452
453 def test_get_content_maintype_missing(self):
454 msg = Message()
455 self.assertEqual(msg.get_content_maintype(), 'text')
456
457 def test_get_content_maintype_missing_with_default_type(self):
458 msg = Message()
459 msg.set_default_type('message/rfc822')
460 self.assertEqual(msg.get_content_maintype(), 'message')
461
462 def test_get_content_maintype_from_message_implicit(self):
463 msg = self._msgobj('msg_30.txt')
464 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
465
466 def test_get_content_maintype_from_message_explicit(self):
467 msg = self._msgobj('msg_28.txt')
468 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
469
470 def test_get_content_maintype_from_message_text_plain_implicit(self):
471 msg = self._msgobj('msg_03.txt')
472 self.assertEqual(msg.get_content_maintype(), 'text')
473
474 def test_get_content_maintype_from_message_text_plain_explicit(self):
475 msg = self._msgobj('msg_01.txt')
476 self.assertEqual(msg.get_content_maintype(), 'text')
477
478 def test_get_content_subtype_missing(self):
479 msg = Message()
480 self.assertEqual(msg.get_content_subtype(), 'plain')
481
482 def test_get_content_subtype_missing_with_default_type(self):
483 msg = Message()
484 msg.set_default_type('message/rfc822')
485 self.assertEqual(msg.get_content_subtype(), 'rfc822')
486
487 def test_get_content_subtype_from_message_implicit(self):
488 msg = self._msgobj('msg_30.txt')
489 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
490
491 def test_get_content_subtype_from_message_explicit(self):
492 msg = self._msgobj('msg_28.txt')
493 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
494
495 def test_get_content_subtype_from_message_text_plain_implicit(self):
496 msg = self._msgobj('msg_03.txt')
497 self.assertEqual(msg.get_content_subtype(), 'plain')
498
499 def test_get_content_subtype_from_message_text_plain_explicit(self):
500 msg = self._msgobj('msg_01.txt')
501 self.assertEqual(msg.get_content_subtype(), 'plain')
502
503 def test_get_content_maintype_error(self):
504 msg = Message()
505 msg['Content-Type'] = 'no-slash-in-this-string'
506 self.assertEqual(msg.get_content_maintype(), 'text')
507
508 def test_get_content_subtype_error(self):
509 msg = Message()
510 msg['Content-Type'] = 'no-slash-in-this-string'
511 self.assertEqual(msg.get_content_subtype(), 'plain')
512
513 def test_replace_header(self):
514 eq = self.assertEqual
515 msg = Message()
516 msg.add_header('First', 'One')
517 msg.add_header('Second', 'Two')
518 msg.add_header('Third', 'Three')
519 eq(msg.keys(), ['First', 'Second', 'Third'])
520 eq(msg.values(), ['One', 'Two', 'Three'])
521 msg.replace_header('Second', 'Twenty')
522 eq(msg.keys(), ['First', 'Second', 'Third'])
523 eq(msg.values(), ['One', 'Twenty', 'Three'])
524 msg.add_header('First', 'Eleven')
525 msg.replace_header('First', 'One Hundred')
526 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
527 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
528 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
529
530 def test_broken_base64_payload(self):
531 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
532 msg = Message()
533 msg['content-type'] = 'audio/x-midi'
534 msg['content-transfer-encoding'] = 'base64'
535 msg.set_payload(x)
536 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000537 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000538
R David Murraya2860e82011-04-16 09:20:30 -0400539 def test_broken_unicode_payload(self):
540 # This test improves coverage but is not a compliance test.
541 # The behavior in this situation is currently undefined by the API.
542 x = 'this is a br\xf6ken thing to do'
543 msg = Message()
544 msg['content-type'] = 'text/plain'
545 msg['content-transfer-encoding'] = '8bit'
546 msg.set_payload(x)
547 self.assertEqual(msg.get_payload(decode=True),
548 bytes(x, 'raw-unicode-escape'))
549
550 def test_questionable_bytes_payload(self):
551 # This test improves coverage but is not a compliance test,
552 # since it involves poking inside the black box.
553 x = 'this is a quéstionable thing to do'.encode('utf-8')
554 msg = Message()
555 msg['content-type'] = 'text/plain; charset="utf-8"'
556 msg['content-transfer-encoding'] = '8bit'
557 msg._payload = x
558 self.assertEqual(msg.get_payload(decode=True), x)
559
R. David Murray7ec754b2010-12-13 23:51:19 +0000560 # Issue 1078919
561 def test_ascii_add_header(self):
562 msg = Message()
563 msg.add_header('Content-Disposition', 'attachment',
564 filename='bud.gif')
565 self.assertEqual('attachment; filename="bud.gif"',
566 msg['Content-Disposition'])
567
568 def test_noascii_add_header(self):
569 msg = Message()
570 msg.add_header('Content-Disposition', 'attachment',
571 filename="Fußballer.ppt")
572 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000573 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000574 msg['Content-Disposition'])
575
576 def test_nonascii_add_header_via_triple(self):
577 msg = Message()
578 msg.add_header('Content-Disposition', 'attachment',
579 filename=('iso-8859-1', '', 'Fußballer.ppt'))
580 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000581 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
582 msg['Content-Disposition'])
583
584 def test_ascii_add_header_with_tspecial(self):
585 msg = Message()
586 msg.add_header('Content-Disposition', 'attachment',
587 filename="windows [filename].ppt")
588 self.assertEqual(
589 'attachment; filename="windows [filename].ppt"',
590 msg['Content-Disposition'])
591
592 def test_nonascii_add_header_with_tspecial(self):
593 msg = Message()
594 msg.add_header('Content-Disposition', 'attachment',
595 filename="Fußballer [filename].ppt")
596 self.assertEqual(
597 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000598 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000599
R David Murraya2860e82011-04-16 09:20:30 -0400600 def test_add_header_with_name_only_param(self):
601 msg = Message()
602 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
603 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
604
605 def test_add_header_with_no_value(self):
606 msg = Message()
607 msg.add_header('X-Status', None)
608 self.assertEqual('', msg['X-Status'])
609
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000610 # Issue 5871: reject an attempt to embed a header inside a header value
611 # (header injection attack).
612 def test_embeded_header_via_Header_rejected(self):
613 msg = Message()
614 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
615 self.assertRaises(errors.HeaderParseError, msg.as_string)
616
617 def test_embeded_header_via_string_rejected(self):
618 msg = Message()
619 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
620 self.assertRaises(errors.HeaderParseError, msg.as_string)
621
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000622# Test the email.encoders module
623class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400624
625 def test_EncodersEncode_base64(self):
626 with openfile('PyBanner048.gif', 'rb') as fp:
627 bindata = fp.read()
628 mimed = email.mime.image.MIMEImage(bindata)
629 base64ed = mimed.get_payload()
630 # the transfer-encoded body lines should all be <=76 characters
631 lines = base64ed.split('\n')
632 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
633
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000634 def test_encode_empty_payload(self):
635 eq = self.assertEqual
636 msg = Message()
637 msg.set_charset('us-ascii')
638 eq(msg['content-transfer-encoding'], '7bit')
639
640 def test_default_cte(self):
641 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000642 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000643 msg = MIMEText('hello world')
644 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000645 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000646 msg = MIMEText('hello \xf8 world')
647 eq(msg['content-transfer-encoding'], '8bit')
648 # And now with a different charset
649 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
650 eq(msg['content-transfer-encoding'], 'quoted-printable')
651
R. David Murraye85200d2010-05-06 01:41:14 +0000652 def test_encode7or8bit(self):
653 # Make sure a charset whose input character set is 8bit but
654 # whose output character set is 7bit gets a transfer-encoding
655 # of 7bit.
656 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000657 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000658 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000659
Ezio Melottib3aedd42010-11-20 19:04:17 +0000660
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000661# Test long header wrapping
662class TestLongHeaders(TestEmailBase):
663 def test_split_long_continuation(self):
664 eq = self.ndiffAssertEqual
665 msg = email.message_from_string("""\
666Subject: bug demonstration
667\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
668\tmore text
669
670test
671""")
672 sfp = StringIO()
673 g = Generator(sfp)
674 g.flatten(msg)
675 eq(sfp.getvalue(), """\
676Subject: bug demonstration
677\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
678\tmore text
679
680test
681""")
682
683 def test_another_long_almost_unsplittable_header(self):
684 eq = self.ndiffAssertEqual
685 hstr = """\
686bug demonstration
687\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
688\tmore text"""
689 h = Header(hstr, continuation_ws='\t')
690 eq(h.encode(), """\
691bug demonstration
692\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
693\tmore text""")
694 h = Header(hstr.replace('\t', ' '))
695 eq(h.encode(), """\
696bug demonstration
697 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
698 more text""")
699
700 def test_long_nonstring(self):
701 eq = self.ndiffAssertEqual
702 g = Charset("iso-8859-1")
703 cz = Charset("iso-8859-2")
704 utf8 = Charset("utf-8")
705 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
706 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
707 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
708 b'bef\xf6rdert. ')
709 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
710 b'd\xf9vtipu.. ')
711 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
712 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
713 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
714 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
715 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
716 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
717 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
718 '\u3044\u307e\u3059\u3002')
719 h = Header(g_head, g, header_name='Subject')
720 h.append(cz_head, cz)
721 h.append(utf8_head, utf8)
722 msg = Message()
723 msg['Subject'] = h
724 sfp = StringIO()
725 g = Generator(sfp)
726 g.flatten(msg)
727 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000728Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
729 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
730 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
731 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
732 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
733 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
734 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
735 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
736 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
737 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
738 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000739
740""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000741 eq(h.encode(maxlinelen=76), """\
742=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
743 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
744 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
745 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
746 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
747 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
748 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
749 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
750 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
751 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
752 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000753
754 def test_long_header_encode(self):
755 eq = self.ndiffAssertEqual
756 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
757 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
758 header_name='X-Foobar-Spoink-Defrobnit')
759 eq(h.encode(), '''\
760wasnipoop; giraffes="very-long-necked-animals";
761 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
762
763 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
764 eq = self.ndiffAssertEqual
765 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
766 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
767 header_name='X-Foobar-Spoink-Defrobnit',
768 continuation_ws='\t')
769 eq(h.encode(), '''\
770wasnipoop; giraffes="very-long-necked-animals";
771 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
772
773 def test_long_header_encode_with_tab_continuation(self):
774 eq = self.ndiffAssertEqual
775 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
776 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
777 header_name='X-Foobar-Spoink-Defrobnit',
778 continuation_ws='\t')
779 eq(h.encode(), '''\
780wasnipoop; giraffes="very-long-necked-animals";
781\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
782
R David Murray3a6152f2011-03-14 21:13:03 -0400783 def test_header_encode_with_different_output_charset(self):
784 h = Header('文', 'euc-jp')
785 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
786
787 def test_long_header_encode_with_different_output_charset(self):
788 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
789 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
790 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
791 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
792 res = """\
793=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
794 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
795 self.assertEqual(h.encode(), res)
796
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000797 def test_header_splitter(self):
798 eq = self.ndiffAssertEqual
799 msg = MIMEText('')
800 # It'd be great if we could use add_header() here, but that doesn't
801 # guarantee an order of the parameters.
802 msg['X-Foobar-Spoink-Defrobnit'] = (
803 'wasnipoop; giraffes="very-long-necked-animals"; '
804 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
805 sfp = StringIO()
806 g = Generator(sfp)
807 g.flatten(msg)
808 eq(sfp.getvalue(), '''\
809Content-Type: text/plain; charset="us-ascii"
810MIME-Version: 1.0
811Content-Transfer-Encoding: 7bit
812X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
813 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
814
815''')
816
817 def test_no_semis_header_splitter(self):
818 eq = self.ndiffAssertEqual
819 msg = Message()
820 msg['From'] = 'test@dom.ain'
821 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
822 msg.set_payload('Test')
823 sfp = StringIO()
824 g = Generator(sfp)
825 g.flatten(msg)
826 eq(sfp.getvalue(), """\
827From: test@dom.ain
828References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
829 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
830
831Test""")
832
R David Murray7da4db12011-04-07 20:37:17 -0400833 def test_last_split_chunk_does_not_fit(self):
834 eq = self.ndiffAssertEqual
835 h = Header('Subject: the first part of this is short, but_the_second'
836 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
837 '_all_by_itself')
838 eq(h.encode(), """\
839Subject: the first part of this is short,
840 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
841
842 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
843 eq = self.ndiffAssertEqual
844 h = Header(', but_the_second'
845 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
846 '_all_by_itself')
847 eq(h.encode(), """\
848,
849 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
850
851 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
852 eq = self.ndiffAssertEqual
853 h = Header(', , but_the_second'
854 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
855 '_all_by_itself')
856 eq(h.encode(), """\
857, ,
858 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
859
860 def test_trailing_splitable_on_overlong_unsplitable(self):
861 eq = self.ndiffAssertEqual
862 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
863 'be_on_a_line_all_by_itself;')
864 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
865 "be_on_a_line_all_by_itself;")
866
867 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
868 eq = self.ndiffAssertEqual
869 h = Header('; '
870 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
871 'be_on_a_line_all_by_itself;')
872 eq(h.encode(), """\
873;
874 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
875
R David Murraye1292a22011-04-07 20:54:03 -0400876 def test_long_header_with_multiple_sequential_split_chars(self):
877 # Issue 11492
878
879 eq = self.ndiffAssertEqual
880 h = Header('This is a long line that has two whitespaces in a row. '
881 'This used to cause truncation of the header when folded')
882 eq(h.encode(), """\
883This is a long line that has two whitespaces in a row. This used to cause
884 truncation of the header when folded""")
885
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000886 def test_no_split_long_header(self):
887 eq = self.ndiffAssertEqual
888 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000889 h = Header(hstr)
890 # These come on two lines because Headers are really field value
891 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000892 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000893References:
894 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
895 h = Header('x' * 80)
896 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000897
898 def test_splitting_multiple_long_lines(self):
899 eq = self.ndiffAssertEqual
900 hstr = """\
901from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
902\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
903\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
904"""
905 h = Header(hstr, continuation_ws='\t')
906 eq(h.encode(), """\
907from babylon.socal-raves.org (localhost [127.0.0.1]);
908 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
909 for <mailman-admin@babylon.socal-raves.org>;
910 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
911\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
912 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
913 for <mailman-admin@babylon.socal-raves.org>;
914 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
915\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
916 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
917 for <mailman-admin@babylon.socal-raves.org>;
918 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
919
920 def test_splitting_first_line_only_is_long(self):
921 eq = self.ndiffAssertEqual
922 hstr = """\
923from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
924\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
925\tid 17k4h5-00034i-00
926\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
927 h = Header(hstr, maxlinelen=78, header_name='Received',
928 continuation_ws='\t')
929 eq(h.encode(), """\
930from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
931 helo=cthulhu.gerg.ca)
932\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
933\tid 17k4h5-00034i-00
934\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
935
936 def test_long_8bit_header(self):
937 eq = self.ndiffAssertEqual
938 msg = Message()
939 h = Header('Britische Regierung gibt', 'iso-8859-1',
940 header_name='Subject')
941 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +0000942 eq(h.encode(maxlinelen=76), """\
943=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
944 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000945 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +0000946 eq(msg.as_string(maxheaderlen=76), """\
947Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
948 =?iso-8859-1?q?hore-Windkraftprojekte?=
949
950""")
951 eq(msg.as_string(maxheaderlen=0), """\
952Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000953
954""")
955
956 def test_long_8bit_header_no_charset(self):
957 eq = self.ndiffAssertEqual
958 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +0000959 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
960 'f\xfcr Offshore-Windkraftprojekte '
961 '<a-very-long-address@example.com>')
962 msg['Reply-To'] = header_string
963 self.assertRaises(UnicodeEncodeError, msg.as_string)
964 msg = Message()
965 msg['Reply-To'] = Header(header_string, 'utf-8',
966 header_name='Reply-To')
967 eq(msg.as_string(maxheaderlen=78), """\
968Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
969 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000970
971""")
972
973 def test_long_to_header(self):
974 eq = self.ndiffAssertEqual
975 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
976 '<someone@eecs.umich.edu>,'
977 '"Someone Test #B" <someone@umich.edu>, '
978 '"Someone Test #C" <someone@eecs.umich.edu>, '
979 '"Someone Test #D" <someone@eecs.umich.edu>')
980 msg = Message()
981 msg['To'] = to
982 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +0000983To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000984 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +0000985 "Someone Test #C" <someone@eecs.umich.edu>,
986 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000987
988''')
989
990 def test_long_line_after_append(self):
991 eq = self.ndiffAssertEqual
992 s = 'This is an example of string which has almost the limit of header length.'
993 h = Header(s)
994 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +0000995 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000996This is an example of string which has almost the limit of header length.
997 Add another line.""")
998
999 def test_shorter_line_with_append(self):
1000 eq = self.ndiffAssertEqual
1001 s = 'This is a shorter line.'
1002 h = Header(s)
1003 h.append('Add another sentence. (Surprise?)')
1004 eq(h.encode(),
1005 'This is a shorter line. Add another sentence. (Surprise?)')
1006
1007 def test_long_field_name(self):
1008 eq = self.ndiffAssertEqual
1009 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001010 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1011 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1012 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1013 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001014 h = Header(gs, 'iso-8859-1', header_name=fn)
1015 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001016 eq(h.encode(maxlinelen=76), """\
1017=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1018 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1019 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1020 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001021
1022 def test_long_received_header(self):
1023 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1024 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1025 'Wed, 05 Mar 2003 18:10:18 -0700')
1026 msg = Message()
1027 msg['Received-1'] = Header(h, continuation_ws='\t')
1028 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001029 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001030 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +00001031Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
1032 Wed, 05 Mar 2003 18:10:18 -0700
1033Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
1034 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001035
1036""")
1037
1038 def test_string_headerinst_eq(self):
1039 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1040 'tu-muenchen.de> (David Bremner\'s message of '
1041 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1042 msg = Message()
1043 msg['Received-1'] = Header(h, header_name='Received-1',
1044 continuation_ws='\t')
1045 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001046 # XXX This should be splitting on spaces not commas.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001047 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +00001048Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
1049 6 Mar 2003 13:58:21 +0100\")
1050Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
1051 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001052
1053""")
1054
1055 def test_long_unbreakable_lines_with_continuation(self):
1056 eq = self.ndiffAssertEqual
1057 msg = Message()
1058 t = """\
1059iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1060 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1061 msg['Face-1'] = t
1062 msg['Face-2'] = Header(t, header_name='Face-2')
Barry Warsawbef9d212007-08-31 10:55:37 +00001063 # XXX This splitting is all wrong. It the first value line should be
1064 # snug against the field name.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001065 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001066Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001067 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001068 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001069Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001070 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001071 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1072
1073""")
1074
1075 def test_another_long_multiline_header(self):
1076 eq = self.ndiffAssertEqual
1077 m = ('Received: from siimage.com '
1078 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001079 'Microsoft SMTPSVC(5.0.2195.4905); '
1080 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001081 msg = email.message_from_string(m)
1082 eq(msg.as_string(maxheaderlen=78), '''\
Barry Warsawbef9d212007-08-31 10:55:37 +00001083Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
1084 Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001085
1086''')
1087
1088 def test_long_lines_with_different_header(self):
1089 eq = self.ndiffAssertEqual
1090 h = ('List-Unsubscribe: '
1091 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1092 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1093 '?subject=unsubscribe>')
1094 msg = Message()
1095 msg['List'] = h
1096 msg['List'] = Header(h, header_name='List')
1097 eq(msg.as_string(maxheaderlen=78), """\
1098List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001099 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001100List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001101 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001102
1103""")
1104
R. David Murray6f0022d2011-01-07 21:57:25 +00001105 def test_long_rfc2047_header_with_embedded_fws(self):
1106 h = Header(textwrap.dedent("""\
1107 We're going to pretend this header is in a non-ascii character set
1108 \tto see if line wrapping with encoded words and embedded
1109 folding white space works"""),
1110 charset='utf-8',
1111 header_name='Test')
1112 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1113 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1114 =?utf-8?q?cter_set?=
1115 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1116 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1117
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001118
Ezio Melottib3aedd42010-11-20 19:04:17 +00001119
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001120# Test mangling of "From " lines in the body of a message
1121class TestFromMangling(unittest.TestCase):
1122 def setUp(self):
1123 self.msg = Message()
1124 self.msg['From'] = 'aaa@bbb.org'
1125 self.msg.set_payload("""\
1126From the desk of A.A.A.:
1127Blah blah blah
1128""")
1129
1130 def test_mangled_from(self):
1131 s = StringIO()
1132 g = Generator(s, mangle_from_=True)
1133 g.flatten(self.msg)
1134 self.assertEqual(s.getvalue(), """\
1135From: aaa@bbb.org
1136
1137>From the desk of A.A.A.:
1138Blah blah blah
1139""")
1140
1141 def test_dont_mangle_from(self):
1142 s = StringIO()
1143 g = Generator(s, mangle_from_=False)
1144 g.flatten(self.msg)
1145 self.assertEqual(s.getvalue(), """\
1146From: aaa@bbb.org
1147
1148From the desk of A.A.A.:
1149Blah blah blah
1150""")
1151
1152
Ezio Melottib3aedd42010-11-20 19:04:17 +00001153
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001154# Test the basic MIMEAudio class
1155class TestMIMEAudio(unittest.TestCase):
1156 def setUp(self):
1157 # Make sure we pick up the audiotest.au that lives in email/test/data.
1158 # In Python, there's an audiotest.au living in Lib/test but that isn't
1159 # included in some binary distros that don't include the test
1160 # package. The trailing empty string on the .join() is significant
1161 # since findfile() will do a dirname().
1162 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
1163 with open(findfile('audiotest.au', datadir), 'rb') as fp:
1164 self._audiodata = fp.read()
1165 self._au = MIMEAudio(self._audiodata)
1166
1167 def test_guess_minor_type(self):
1168 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1169
1170 def test_encoding(self):
1171 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001172 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1173 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001174
1175 def test_checkSetMinor(self):
1176 au = MIMEAudio(self._audiodata, 'fish')
1177 self.assertEqual(au.get_content_type(), 'audio/fish')
1178
1179 def test_add_header(self):
1180 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001181 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001182 self._au.add_header('Content-Disposition', 'attachment',
1183 filename='audiotest.au')
1184 eq(self._au['content-disposition'],
1185 'attachment; filename="audiotest.au"')
1186 eq(self._au.get_params(header='content-disposition'),
1187 [('attachment', ''), ('filename', 'audiotest.au')])
1188 eq(self._au.get_param('filename', header='content-disposition'),
1189 'audiotest.au')
1190 missing = []
1191 eq(self._au.get_param('attachment', header='content-disposition'), '')
1192 unless(self._au.get_param('foo', failobj=missing,
1193 header='content-disposition') is missing)
1194 # Try some missing stuff
1195 unless(self._au.get_param('foobar', missing) is missing)
1196 unless(self._au.get_param('attachment', missing,
1197 header='foobar') is missing)
1198
1199
Ezio Melottib3aedd42010-11-20 19:04:17 +00001200
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001201# Test the basic MIMEImage class
1202class TestMIMEImage(unittest.TestCase):
1203 def setUp(self):
1204 with openfile('PyBanner048.gif', 'rb') as fp:
1205 self._imgdata = fp.read()
1206 self._im = MIMEImage(self._imgdata)
1207
1208 def test_guess_minor_type(self):
1209 self.assertEqual(self._im.get_content_type(), 'image/gif')
1210
1211 def test_encoding(self):
1212 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001213 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1214 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001215
1216 def test_checkSetMinor(self):
1217 im = MIMEImage(self._imgdata, 'fish')
1218 self.assertEqual(im.get_content_type(), 'image/fish')
1219
1220 def test_add_header(self):
1221 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001222 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001223 self._im.add_header('Content-Disposition', 'attachment',
1224 filename='dingusfish.gif')
1225 eq(self._im['content-disposition'],
1226 'attachment; filename="dingusfish.gif"')
1227 eq(self._im.get_params(header='content-disposition'),
1228 [('attachment', ''), ('filename', 'dingusfish.gif')])
1229 eq(self._im.get_param('filename', header='content-disposition'),
1230 'dingusfish.gif')
1231 missing = []
1232 eq(self._im.get_param('attachment', header='content-disposition'), '')
1233 unless(self._im.get_param('foo', failobj=missing,
1234 header='content-disposition') is missing)
1235 # Try some missing stuff
1236 unless(self._im.get_param('foobar', missing) is missing)
1237 unless(self._im.get_param('attachment', missing,
1238 header='foobar') is missing)
1239
1240
Ezio Melottib3aedd42010-11-20 19:04:17 +00001241
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001242# Test the basic MIMEApplication class
1243class TestMIMEApplication(unittest.TestCase):
1244 def test_headers(self):
1245 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001246 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001247 eq(msg.get_content_type(), 'application/octet-stream')
1248 eq(msg['content-transfer-encoding'], 'base64')
1249
1250 def test_body(self):
1251 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001252 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1253 msg = MIMEApplication(bytesdata)
1254 # whitespace in the cte encoded block is RFC-irrelevant.
1255 eq(msg.get_payload().strip(), '+vv8/f7/')
1256 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001257
1258
Ezio Melottib3aedd42010-11-20 19:04:17 +00001259
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001260# Test the basic MIMEText class
1261class TestMIMEText(unittest.TestCase):
1262 def setUp(self):
1263 self._msg = MIMEText('hello there')
1264
1265 def test_types(self):
1266 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001267 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001268 eq(self._msg.get_content_type(), 'text/plain')
1269 eq(self._msg.get_param('charset'), 'us-ascii')
1270 missing = []
1271 unless(self._msg.get_param('foobar', missing) is missing)
1272 unless(self._msg.get_param('charset', missing, header='foobar')
1273 is missing)
1274
1275 def test_payload(self):
1276 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001277 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001278
1279 def test_charset(self):
1280 eq = self.assertEqual
1281 msg = MIMEText('hello there', _charset='us-ascii')
1282 eq(msg.get_charset().input_charset, 'us-ascii')
1283 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1284
R. David Murray850fc852010-06-03 01:58:28 +00001285 def test_7bit_input(self):
1286 eq = self.assertEqual
1287 msg = MIMEText('hello there', _charset='us-ascii')
1288 eq(msg.get_charset().input_charset, 'us-ascii')
1289 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1290
1291 def test_7bit_input_no_charset(self):
1292 eq = self.assertEqual
1293 msg = MIMEText('hello there')
1294 eq(msg.get_charset(), 'us-ascii')
1295 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1296 self.assertTrue('hello there' in msg.as_string())
1297
1298 def test_utf8_input(self):
1299 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1300 eq = self.assertEqual
1301 msg = MIMEText(teststr, _charset='utf-8')
1302 eq(msg.get_charset().output_charset, 'utf-8')
1303 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1304 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1305
1306 @unittest.skip("can't fix because of backward compat in email5, "
1307 "will fix in email6")
1308 def test_utf8_input_no_charset(self):
1309 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1310 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1311
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001312
Ezio Melottib3aedd42010-11-20 19:04:17 +00001313
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001314# Test complicated multipart/* messages
1315class TestMultipart(TestEmailBase):
1316 def setUp(self):
1317 with openfile('PyBanner048.gif', 'rb') as fp:
1318 data = fp.read()
1319 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1320 image = MIMEImage(data, name='dingusfish.gif')
1321 image.add_header('content-disposition', 'attachment',
1322 filename='dingusfish.gif')
1323 intro = MIMEText('''\
1324Hi there,
1325
1326This is the dingus fish.
1327''')
1328 container.attach(intro)
1329 container.attach(image)
1330 container['From'] = 'Barry <barry@digicool.com>'
1331 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1332 container['Subject'] = 'Here is your dingus fish'
1333
1334 now = 987809702.54848599
1335 timetuple = time.localtime(now)
1336 if timetuple[-1] == 0:
1337 tzsecs = time.timezone
1338 else:
1339 tzsecs = time.altzone
1340 if tzsecs > 0:
1341 sign = '-'
1342 else:
1343 sign = '+'
1344 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1345 container['Date'] = time.strftime(
1346 '%a, %d %b %Y %H:%M:%S',
1347 time.localtime(now)) + tzoffset
1348 self._msg = container
1349 self._im = image
1350 self._txt = intro
1351
1352 def test_hierarchy(self):
1353 # convenience
1354 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001355 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001356 raises = self.assertRaises
1357 # tests
1358 m = self._msg
1359 unless(m.is_multipart())
1360 eq(m.get_content_type(), 'multipart/mixed')
1361 eq(len(m.get_payload()), 2)
1362 raises(IndexError, m.get_payload, 2)
1363 m0 = m.get_payload(0)
1364 m1 = m.get_payload(1)
1365 unless(m0 is self._txt)
1366 unless(m1 is self._im)
1367 eq(m.get_payload(), [m0, m1])
1368 unless(not m0.is_multipart())
1369 unless(not m1.is_multipart())
1370
1371 def test_empty_multipart_idempotent(self):
1372 text = """\
1373Content-Type: multipart/mixed; boundary="BOUNDARY"
1374MIME-Version: 1.0
1375Subject: A subject
1376To: aperson@dom.ain
1377From: bperson@dom.ain
1378
1379
1380--BOUNDARY
1381
1382
1383--BOUNDARY--
1384"""
1385 msg = Parser().parsestr(text)
1386 self.ndiffAssertEqual(text, msg.as_string())
1387
1388 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1389 outer = MIMEBase('multipart', 'mixed')
1390 outer['Subject'] = 'A subject'
1391 outer['To'] = 'aperson@dom.ain'
1392 outer['From'] = 'bperson@dom.ain'
1393 outer.set_boundary('BOUNDARY')
1394 self.ndiffAssertEqual(outer.as_string(), '''\
1395Content-Type: multipart/mixed; boundary="BOUNDARY"
1396MIME-Version: 1.0
1397Subject: A subject
1398To: aperson@dom.ain
1399From: bperson@dom.ain
1400
1401--BOUNDARY
1402
1403--BOUNDARY--''')
1404
1405 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1406 outer = MIMEBase('multipart', 'mixed')
1407 outer['Subject'] = 'A subject'
1408 outer['To'] = 'aperson@dom.ain'
1409 outer['From'] = 'bperson@dom.ain'
1410 outer.preamble = ''
1411 outer.epilogue = ''
1412 outer.set_boundary('BOUNDARY')
1413 self.ndiffAssertEqual(outer.as_string(), '''\
1414Content-Type: multipart/mixed; boundary="BOUNDARY"
1415MIME-Version: 1.0
1416Subject: A subject
1417To: aperson@dom.ain
1418From: bperson@dom.ain
1419
1420
1421--BOUNDARY
1422
1423--BOUNDARY--
1424''')
1425
1426 def test_one_part_in_a_multipart(self):
1427 eq = self.ndiffAssertEqual
1428 outer = MIMEBase('multipart', 'mixed')
1429 outer['Subject'] = 'A subject'
1430 outer['To'] = 'aperson@dom.ain'
1431 outer['From'] = 'bperson@dom.ain'
1432 outer.set_boundary('BOUNDARY')
1433 msg = MIMEText('hello world')
1434 outer.attach(msg)
1435 eq(outer.as_string(), '''\
1436Content-Type: multipart/mixed; boundary="BOUNDARY"
1437MIME-Version: 1.0
1438Subject: A subject
1439To: aperson@dom.ain
1440From: bperson@dom.ain
1441
1442--BOUNDARY
1443Content-Type: text/plain; charset="us-ascii"
1444MIME-Version: 1.0
1445Content-Transfer-Encoding: 7bit
1446
1447hello world
1448--BOUNDARY--''')
1449
1450 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1451 eq = self.ndiffAssertEqual
1452 outer = MIMEBase('multipart', 'mixed')
1453 outer['Subject'] = 'A subject'
1454 outer['To'] = 'aperson@dom.ain'
1455 outer['From'] = 'bperson@dom.ain'
1456 outer.preamble = ''
1457 msg = MIMEText('hello world')
1458 outer.attach(msg)
1459 outer.set_boundary('BOUNDARY')
1460 eq(outer.as_string(), '''\
1461Content-Type: multipart/mixed; boundary="BOUNDARY"
1462MIME-Version: 1.0
1463Subject: A subject
1464To: aperson@dom.ain
1465From: bperson@dom.ain
1466
1467
1468--BOUNDARY
1469Content-Type: text/plain; charset="us-ascii"
1470MIME-Version: 1.0
1471Content-Transfer-Encoding: 7bit
1472
1473hello world
1474--BOUNDARY--''')
1475
1476
1477 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1478 eq = self.ndiffAssertEqual
1479 outer = MIMEBase('multipart', 'mixed')
1480 outer['Subject'] = 'A subject'
1481 outer['To'] = 'aperson@dom.ain'
1482 outer['From'] = 'bperson@dom.ain'
1483 outer.preamble = None
1484 msg = MIMEText('hello world')
1485 outer.attach(msg)
1486 outer.set_boundary('BOUNDARY')
1487 eq(outer.as_string(), '''\
1488Content-Type: multipart/mixed; boundary="BOUNDARY"
1489MIME-Version: 1.0
1490Subject: A subject
1491To: aperson@dom.ain
1492From: bperson@dom.ain
1493
1494--BOUNDARY
1495Content-Type: text/plain; charset="us-ascii"
1496MIME-Version: 1.0
1497Content-Transfer-Encoding: 7bit
1498
1499hello world
1500--BOUNDARY--''')
1501
1502
1503 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1504 eq = self.ndiffAssertEqual
1505 outer = MIMEBase('multipart', 'mixed')
1506 outer['Subject'] = 'A subject'
1507 outer['To'] = 'aperson@dom.ain'
1508 outer['From'] = 'bperson@dom.ain'
1509 outer.epilogue = None
1510 msg = MIMEText('hello world')
1511 outer.attach(msg)
1512 outer.set_boundary('BOUNDARY')
1513 eq(outer.as_string(), '''\
1514Content-Type: multipart/mixed; boundary="BOUNDARY"
1515MIME-Version: 1.0
1516Subject: A subject
1517To: aperson@dom.ain
1518From: bperson@dom.ain
1519
1520--BOUNDARY
1521Content-Type: text/plain; charset="us-ascii"
1522MIME-Version: 1.0
1523Content-Transfer-Encoding: 7bit
1524
1525hello world
1526--BOUNDARY--''')
1527
1528
1529 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1530 eq = self.ndiffAssertEqual
1531 outer = MIMEBase('multipart', 'mixed')
1532 outer['Subject'] = 'A subject'
1533 outer['To'] = 'aperson@dom.ain'
1534 outer['From'] = 'bperson@dom.ain'
1535 outer.epilogue = ''
1536 msg = MIMEText('hello world')
1537 outer.attach(msg)
1538 outer.set_boundary('BOUNDARY')
1539 eq(outer.as_string(), '''\
1540Content-Type: multipart/mixed; boundary="BOUNDARY"
1541MIME-Version: 1.0
1542Subject: A subject
1543To: aperson@dom.ain
1544From: bperson@dom.ain
1545
1546--BOUNDARY
1547Content-Type: text/plain; charset="us-ascii"
1548MIME-Version: 1.0
1549Content-Transfer-Encoding: 7bit
1550
1551hello world
1552--BOUNDARY--
1553''')
1554
1555
1556 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1557 eq = self.ndiffAssertEqual
1558 outer = MIMEBase('multipart', 'mixed')
1559 outer['Subject'] = 'A subject'
1560 outer['To'] = 'aperson@dom.ain'
1561 outer['From'] = 'bperson@dom.ain'
1562 outer.epilogue = '\n'
1563 msg = MIMEText('hello world')
1564 outer.attach(msg)
1565 outer.set_boundary('BOUNDARY')
1566 eq(outer.as_string(), '''\
1567Content-Type: multipart/mixed; boundary="BOUNDARY"
1568MIME-Version: 1.0
1569Subject: A subject
1570To: aperson@dom.ain
1571From: bperson@dom.ain
1572
1573--BOUNDARY
1574Content-Type: text/plain; charset="us-ascii"
1575MIME-Version: 1.0
1576Content-Transfer-Encoding: 7bit
1577
1578hello world
1579--BOUNDARY--
1580
1581''')
1582
1583 def test_message_external_body(self):
1584 eq = self.assertEqual
1585 msg = self._msgobj('msg_36.txt')
1586 eq(len(msg.get_payload()), 2)
1587 msg1 = msg.get_payload(1)
1588 eq(msg1.get_content_type(), 'multipart/alternative')
1589 eq(len(msg1.get_payload()), 2)
1590 for subpart in msg1.get_payload():
1591 eq(subpart.get_content_type(), 'message/external-body')
1592 eq(len(subpart.get_payload()), 1)
1593 subsubpart = subpart.get_payload(0)
1594 eq(subsubpart.get_content_type(), 'text/plain')
1595
1596 def test_double_boundary(self):
1597 # msg_37.txt is a multipart that contains two dash-boundary's in a
1598 # row. Our interpretation of RFC 2046 calls for ignoring the second
1599 # and subsequent boundaries.
1600 msg = self._msgobj('msg_37.txt')
1601 self.assertEqual(len(msg.get_payload()), 3)
1602
1603 def test_nested_inner_contains_outer_boundary(self):
1604 eq = self.ndiffAssertEqual
1605 # msg_38.txt has an inner part that contains outer boundaries. My
1606 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1607 # these are illegal and should be interpreted as unterminated inner
1608 # parts.
1609 msg = self._msgobj('msg_38.txt')
1610 sfp = StringIO()
1611 iterators._structure(msg, sfp)
1612 eq(sfp.getvalue(), """\
1613multipart/mixed
1614 multipart/mixed
1615 multipart/alternative
1616 text/plain
1617 text/plain
1618 text/plain
1619 text/plain
1620""")
1621
1622 def test_nested_with_same_boundary(self):
1623 eq = self.ndiffAssertEqual
1624 # msg 39.txt is similarly evil in that it's got inner parts that use
1625 # the same boundary as outer parts. Again, I believe the way this is
1626 # parsed is closest to the spirit of RFC 2046
1627 msg = self._msgobj('msg_39.txt')
1628 sfp = StringIO()
1629 iterators._structure(msg, sfp)
1630 eq(sfp.getvalue(), """\
1631multipart/mixed
1632 multipart/mixed
1633 multipart/alternative
1634 application/octet-stream
1635 application/octet-stream
1636 text/plain
1637""")
1638
1639 def test_boundary_in_non_multipart(self):
1640 msg = self._msgobj('msg_40.txt')
1641 self.assertEqual(msg.as_string(), '''\
1642MIME-Version: 1.0
1643Content-Type: text/html; boundary="--961284236552522269"
1644
1645----961284236552522269
1646Content-Type: text/html;
1647Content-Transfer-Encoding: 7Bit
1648
1649<html></html>
1650
1651----961284236552522269--
1652''')
1653
1654 def test_boundary_with_leading_space(self):
1655 eq = self.assertEqual
1656 msg = email.message_from_string('''\
1657MIME-Version: 1.0
1658Content-Type: multipart/mixed; boundary=" XXXX"
1659
1660-- XXXX
1661Content-Type: text/plain
1662
1663
1664-- XXXX
1665Content-Type: text/plain
1666
1667-- XXXX--
1668''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001669 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001670 eq(msg.get_boundary(), ' XXXX')
1671 eq(len(msg.get_payload()), 2)
1672
1673 def test_boundary_without_trailing_newline(self):
1674 m = Parser().parsestr("""\
1675Content-Type: multipart/mixed; boundary="===============0012394164=="
1676MIME-Version: 1.0
1677
1678--===============0012394164==
1679Content-Type: image/file1.jpg
1680MIME-Version: 1.0
1681Content-Transfer-Encoding: base64
1682
1683YXNkZg==
1684--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001685 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001686
1687
Ezio Melottib3aedd42010-11-20 19:04:17 +00001688
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001689# Test some badly formatted messages
1690class TestNonConformant(TestEmailBase):
1691 def test_parse_missing_minor_type(self):
1692 eq = self.assertEqual
1693 msg = self._msgobj('msg_14.txt')
1694 eq(msg.get_content_type(), 'text/plain')
1695 eq(msg.get_content_maintype(), 'text')
1696 eq(msg.get_content_subtype(), 'plain')
1697
1698 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001699 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001700 msg = self._msgobj('msg_15.txt')
1701 # XXX We can probably eventually do better
1702 inner = msg.get_payload(0)
1703 unless(hasattr(inner, 'defects'))
1704 self.assertEqual(len(inner.defects), 1)
1705 unless(isinstance(inner.defects[0],
1706 errors.StartBoundaryNotFoundDefect))
1707
1708 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001709 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001710 msg = self._msgobj('msg_25.txt')
1711 unless(isinstance(msg.get_payload(), str))
1712 self.assertEqual(len(msg.defects), 2)
1713 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1714 unless(isinstance(msg.defects[1],
1715 errors.MultipartInvariantViolationDefect))
1716
1717 def test_invalid_content_type(self):
1718 eq = self.assertEqual
1719 neq = self.ndiffAssertEqual
1720 msg = Message()
1721 # RFC 2045, $5.2 says invalid yields text/plain
1722 msg['Content-Type'] = 'text'
1723 eq(msg.get_content_maintype(), 'text')
1724 eq(msg.get_content_subtype(), 'plain')
1725 eq(msg.get_content_type(), 'text/plain')
1726 # Clear the old value and try something /really/ invalid
1727 del msg['content-type']
1728 msg['Content-Type'] = 'foo'
1729 eq(msg.get_content_maintype(), 'text')
1730 eq(msg.get_content_subtype(), 'plain')
1731 eq(msg.get_content_type(), 'text/plain')
1732 # Still, make sure that the message is idempotently generated
1733 s = StringIO()
1734 g = Generator(s)
1735 g.flatten(msg)
1736 neq(s.getvalue(), 'Content-Type: foo\n\n')
1737
1738 def test_no_start_boundary(self):
1739 eq = self.ndiffAssertEqual
1740 msg = self._msgobj('msg_31.txt')
1741 eq(msg.get_payload(), """\
1742--BOUNDARY
1743Content-Type: text/plain
1744
1745message 1
1746
1747--BOUNDARY
1748Content-Type: text/plain
1749
1750message 2
1751
1752--BOUNDARY--
1753""")
1754
1755 def test_no_separating_blank_line(self):
1756 eq = self.ndiffAssertEqual
1757 msg = self._msgobj('msg_35.txt')
1758 eq(msg.as_string(), """\
1759From: aperson@dom.ain
1760To: bperson@dom.ain
1761Subject: here's something interesting
1762
1763counter to RFC 2822, there's no separating newline here
1764""")
1765
1766 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001767 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001768 msg = self._msgobj('msg_41.txt')
1769 unless(hasattr(msg, 'defects'))
1770 self.assertEqual(len(msg.defects), 2)
1771 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1772 unless(isinstance(msg.defects[1],
1773 errors.MultipartInvariantViolationDefect))
1774
1775 def test_missing_start_boundary(self):
1776 outer = self._msgobj('msg_42.txt')
1777 # The message structure is:
1778 #
1779 # multipart/mixed
1780 # text/plain
1781 # message/rfc822
1782 # multipart/mixed [*]
1783 #
1784 # [*] This message is missing its start boundary
1785 bad = outer.get_payload(1).get_payload(0)
1786 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001787 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001788 errors.StartBoundaryNotFoundDefect))
1789
1790 def test_first_line_is_continuation_header(self):
1791 eq = self.assertEqual
1792 m = ' Line 1\nLine 2\nLine 3'
1793 msg = email.message_from_string(m)
1794 eq(msg.keys(), [])
1795 eq(msg.get_payload(), 'Line 2\nLine 3')
1796 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001797 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001798 errors.FirstHeaderLineIsContinuationDefect))
1799 eq(msg.defects[0].line, ' Line 1\n')
1800
1801
Ezio Melottib3aedd42010-11-20 19:04:17 +00001802
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001803# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001804class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001805 def test_rfc2047_multiline(self):
1806 eq = self.assertEqual
1807 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1808 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1809 dh = decode_header(s)
1810 eq(dh, [
1811 (b'Re:', None),
1812 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1813 (b'baz foo bar', None),
1814 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1815 header = make_header(dh)
1816 eq(str(header),
1817 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001818 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001819Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1820 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001821
1822 def test_whitespace_eater_unicode(self):
1823 eq = self.assertEqual
1824 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1825 dh = decode_header(s)
1826 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1827 (b'Pirard <pirard@dom.ain>', None)])
1828 header = str(make_header(dh))
1829 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1830
1831 def test_whitespace_eater_unicode_2(self):
1832 eq = self.assertEqual
1833 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1834 dh = decode_header(s)
1835 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1836 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1837 hu = str(make_header(dh))
1838 eq(hu, 'The quick brown fox jumped over the lazy dog')
1839
1840 def test_rfc2047_missing_whitespace(self):
1841 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1842 dh = decode_header(s)
1843 self.assertEqual(dh, [(s, None)])
1844
1845 def test_rfc2047_with_whitespace(self):
1846 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1847 dh = decode_header(s)
1848 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1849 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1850 (b'sbord', None)])
1851
R. David Murrayc4e69cc2010-08-03 22:14:10 +00001852 def test_rfc2047_B_bad_padding(self):
1853 s = '=?iso-8859-1?B?%s?='
1854 data = [ # only test complete bytes
1855 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
1856 ('dmk=', b'vi'), ('dmk', b'vi')
1857 ]
1858 for q, a in data:
1859 dh = decode_header(s % q)
1860 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001861
R. David Murray31e984c2010-10-01 15:40:20 +00001862 def test_rfc2047_Q_invalid_digits(self):
1863 # issue 10004.
1864 s = '=?iso-8659-1?Q?andr=e9=zz?='
1865 self.assertEqual(decode_header(s),
1866 [(b'andr\xe9=zz', 'iso-8659-1')])
1867
Ezio Melottib3aedd42010-11-20 19:04:17 +00001868
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001869# Test the MIMEMessage class
1870class TestMIMEMessage(TestEmailBase):
1871 def setUp(self):
1872 with openfile('msg_11.txt') as fp:
1873 self._text = fp.read()
1874
1875 def test_type_error(self):
1876 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1877
1878 def test_valid_argument(self):
1879 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001880 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001881 subject = 'A sub-message'
1882 m = Message()
1883 m['Subject'] = subject
1884 r = MIMEMessage(m)
1885 eq(r.get_content_type(), 'message/rfc822')
1886 payload = r.get_payload()
1887 unless(isinstance(payload, list))
1888 eq(len(payload), 1)
1889 subpart = payload[0]
1890 unless(subpart is m)
1891 eq(subpart['subject'], subject)
1892
1893 def test_bad_multipart(self):
1894 eq = self.assertEqual
1895 msg1 = Message()
1896 msg1['Subject'] = 'subpart 1'
1897 msg2 = Message()
1898 msg2['Subject'] = 'subpart 2'
1899 r = MIMEMessage(msg1)
1900 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
1901
1902 def test_generate(self):
1903 # First craft the message to be encapsulated
1904 m = Message()
1905 m['Subject'] = 'An enclosed message'
1906 m.set_payload('Here is the body of the message.\n')
1907 r = MIMEMessage(m)
1908 r['Subject'] = 'The enclosing message'
1909 s = StringIO()
1910 g = Generator(s)
1911 g.flatten(r)
1912 self.assertEqual(s.getvalue(), """\
1913Content-Type: message/rfc822
1914MIME-Version: 1.0
1915Subject: The enclosing message
1916
1917Subject: An enclosed message
1918
1919Here is the body of the message.
1920""")
1921
1922 def test_parse_message_rfc822(self):
1923 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001924 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001925 msg = self._msgobj('msg_11.txt')
1926 eq(msg.get_content_type(), 'message/rfc822')
1927 payload = msg.get_payload()
1928 unless(isinstance(payload, list))
1929 eq(len(payload), 1)
1930 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001931 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001932 eq(submsg['subject'], 'An enclosed message')
1933 eq(submsg.get_payload(), 'Here is the body of the message.\n')
1934
1935 def test_dsn(self):
1936 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001937 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001938 # msg 16 is a Delivery Status Notification, see RFC 1894
1939 msg = self._msgobj('msg_16.txt')
1940 eq(msg.get_content_type(), 'multipart/report')
1941 unless(msg.is_multipart())
1942 eq(len(msg.get_payload()), 3)
1943 # Subpart 1 is a text/plain, human readable section
1944 subpart = msg.get_payload(0)
1945 eq(subpart.get_content_type(), 'text/plain')
1946 eq(subpart.get_payload(), """\
1947This report relates to a message you sent with the following header fields:
1948
1949 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
1950 Date: Sun, 23 Sep 2001 20:10:55 -0700
1951 From: "Ian T. Henry" <henryi@oxy.edu>
1952 To: SoCal Raves <scr@socal-raves.org>
1953 Subject: [scr] yeah for Ians!!
1954
1955Your message cannot be delivered to the following recipients:
1956
1957 Recipient address: jangel1@cougar.noc.ucla.edu
1958 Reason: recipient reached disk quota
1959
1960""")
1961 # Subpart 2 contains the machine parsable DSN information. It
1962 # consists of two blocks of headers, represented by two nested Message
1963 # objects.
1964 subpart = msg.get_payload(1)
1965 eq(subpart.get_content_type(), 'message/delivery-status')
1966 eq(len(subpart.get_payload()), 2)
1967 # message/delivery-status should treat each block as a bunch of
1968 # headers, i.e. a bunch of Message objects.
1969 dsn1 = subpart.get_payload(0)
1970 unless(isinstance(dsn1, Message))
1971 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
1972 eq(dsn1.get_param('dns', header='reporting-mta'), '')
1973 # Try a missing one <wink>
1974 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
1975 dsn2 = subpart.get_payload(1)
1976 unless(isinstance(dsn2, Message))
1977 eq(dsn2['action'], 'failed')
1978 eq(dsn2.get_params(header='original-recipient'),
1979 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
1980 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
1981 # Subpart 3 is the original message
1982 subpart = msg.get_payload(2)
1983 eq(subpart.get_content_type(), 'message/rfc822')
1984 payload = subpart.get_payload()
1985 unless(isinstance(payload, list))
1986 eq(len(payload), 1)
1987 subsubpart = payload[0]
1988 unless(isinstance(subsubpart, Message))
1989 eq(subsubpart.get_content_type(), 'text/plain')
1990 eq(subsubpart['message-id'],
1991 '<002001c144a6$8752e060$56104586@oxy.edu>')
1992
1993 def test_epilogue(self):
1994 eq = self.ndiffAssertEqual
1995 with openfile('msg_21.txt') as fp:
1996 text = fp.read()
1997 msg = Message()
1998 msg['From'] = 'aperson@dom.ain'
1999 msg['To'] = 'bperson@dom.ain'
2000 msg['Subject'] = 'Test'
2001 msg.preamble = 'MIME message'
2002 msg.epilogue = 'End of MIME message\n'
2003 msg1 = MIMEText('One')
2004 msg2 = MIMEText('Two')
2005 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2006 msg.attach(msg1)
2007 msg.attach(msg2)
2008 sfp = StringIO()
2009 g = Generator(sfp)
2010 g.flatten(msg)
2011 eq(sfp.getvalue(), text)
2012
2013 def test_no_nl_preamble(self):
2014 eq = self.ndiffAssertEqual
2015 msg = Message()
2016 msg['From'] = 'aperson@dom.ain'
2017 msg['To'] = 'bperson@dom.ain'
2018 msg['Subject'] = 'Test'
2019 msg.preamble = 'MIME message'
2020 msg.epilogue = ''
2021 msg1 = MIMEText('One')
2022 msg2 = MIMEText('Two')
2023 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2024 msg.attach(msg1)
2025 msg.attach(msg2)
2026 eq(msg.as_string(), """\
2027From: aperson@dom.ain
2028To: bperson@dom.ain
2029Subject: Test
2030Content-Type: multipart/mixed; boundary="BOUNDARY"
2031
2032MIME message
2033--BOUNDARY
2034Content-Type: text/plain; charset="us-ascii"
2035MIME-Version: 1.0
2036Content-Transfer-Encoding: 7bit
2037
2038One
2039--BOUNDARY
2040Content-Type: text/plain; charset="us-ascii"
2041MIME-Version: 1.0
2042Content-Transfer-Encoding: 7bit
2043
2044Two
2045--BOUNDARY--
2046""")
2047
2048 def test_default_type(self):
2049 eq = self.assertEqual
2050 with openfile('msg_30.txt') as fp:
2051 msg = email.message_from_file(fp)
2052 container1 = msg.get_payload(0)
2053 eq(container1.get_default_type(), 'message/rfc822')
2054 eq(container1.get_content_type(), 'message/rfc822')
2055 container2 = msg.get_payload(1)
2056 eq(container2.get_default_type(), 'message/rfc822')
2057 eq(container2.get_content_type(), 'message/rfc822')
2058 container1a = container1.get_payload(0)
2059 eq(container1a.get_default_type(), 'text/plain')
2060 eq(container1a.get_content_type(), 'text/plain')
2061 container2a = container2.get_payload(0)
2062 eq(container2a.get_default_type(), 'text/plain')
2063 eq(container2a.get_content_type(), 'text/plain')
2064
2065 def test_default_type_with_explicit_container_type(self):
2066 eq = self.assertEqual
2067 with openfile('msg_28.txt') as fp:
2068 msg = email.message_from_file(fp)
2069 container1 = msg.get_payload(0)
2070 eq(container1.get_default_type(), 'message/rfc822')
2071 eq(container1.get_content_type(), 'message/rfc822')
2072 container2 = msg.get_payload(1)
2073 eq(container2.get_default_type(), 'message/rfc822')
2074 eq(container2.get_content_type(), 'message/rfc822')
2075 container1a = container1.get_payload(0)
2076 eq(container1a.get_default_type(), 'text/plain')
2077 eq(container1a.get_content_type(), 'text/plain')
2078 container2a = container2.get_payload(0)
2079 eq(container2a.get_default_type(), 'text/plain')
2080 eq(container2a.get_content_type(), 'text/plain')
2081
2082 def test_default_type_non_parsed(self):
2083 eq = self.assertEqual
2084 neq = self.ndiffAssertEqual
2085 # Set up container
2086 container = MIMEMultipart('digest', 'BOUNDARY')
2087 container.epilogue = ''
2088 # Set up subparts
2089 subpart1a = MIMEText('message 1\n')
2090 subpart2a = MIMEText('message 2\n')
2091 subpart1 = MIMEMessage(subpart1a)
2092 subpart2 = MIMEMessage(subpart2a)
2093 container.attach(subpart1)
2094 container.attach(subpart2)
2095 eq(subpart1.get_content_type(), 'message/rfc822')
2096 eq(subpart1.get_default_type(), 'message/rfc822')
2097 eq(subpart2.get_content_type(), 'message/rfc822')
2098 eq(subpart2.get_default_type(), 'message/rfc822')
2099 neq(container.as_string(0), '''\
2100Content-Type: multipart/digest; boundary="BOUNDARY"
2101MIME-Version: 1.0
2102
2103--BOUNDARY
2104Content-Type: message/rfc822
2105MIME-Version: 1.0
2106
2107Content-Type: text/plain; charset="us-ascii"
2108MIME-Version: 1.0
2109Content-Transfer-Encoding: 7bit
2110
2111message 1
2112
2113--BOUNDARY
2114Content-Type: message/rfc822
2115MIME-Version: 1.0
2116
2117Content-Type: text/plain; charset="us-ascii"
2118MIME-Version: 1.0
2119Content-Transfer-Encoding: 7bit
2120
2121message 2
2122
2123--BOUNDARY--
2124''')
2125 del subpart1['content-type']
2126 del subpart1['mime-version']
2127 del subpart2['content-type']
2128 del subpart2['mime-version']
2129 eq(subpart1.get_content_type(), 'message/rfc822')
2130 eq(subpart1.get_default_type(), 'message/rfc822')
2131 eq(subpart2.get_content_type(), 'message/rfc822')
2132 eq(subpart2.get_default_type(), 'message/rfc822')
2133 neq(container.as_string(0), '''\
2134Content-Type: multipart/digest; boundary="BOUNDARY"
2135MIME-Version: 1.0
2136
2137--BOUNDARY
2138
2139Content-Type: text/plain; charset="us-ascii"
2140MIME-Version: 1.0
2141Content-Transfer-Encoding: 7bit
2142
2143message 1
2144
2145--BOUNDARY
2146
2147Content-Type: text/plain; charset="us-ascii"
2148MIME-Version: 1.0
2149Content-Transfer-Encoding: 7bit
2150
2151message 2
2152
2153--BOUNDARY--
2154''')
2155
2156 def test_mime_attachments_in_constructor(self):
2157 eq = self.assertEqual
2158 text1 = MIMEText('')
2159 text2 = MIMEText('')
2160 msg = MIMEMultipart(_subparts=(text1, text2))
2161 eq(len(msg.get_payload()), 2)
2162 eq(msg.get_payload(0), text1)
2163 eq(msg.get_payload(1), text2)
2164
Christian Heimes587c2bf2008-01-19 16:21:02 +00002165 def test_default_multipart_constructor(self):
2166 msg = MIMEMultipart()
2167 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002168
Ezio Melottib3aedd42010-11-20 19:04:17 +00002169
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002170# A general test of parser->model->generator idempotency. IOW, read a message
2171# in, parse it into a message object tree, then without touching the tree,
2172# regenerate the plain text. The original text and the transformed text
2173# should be identical. Note: that we ignore the Unix-From since that may
2174# contain a changed date.
2175class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002176
2177 linesep = '\n'
2178
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002179 def _msgobj(self, filename):
2180 with openfile(filename) as fp:
2181 data = fp.read()
2182 msg = email.message_from_string(data)
2183 return msg, data
2184
R. David Murray719a4492010-11-21 16:53:48 +00002185 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002186 eq = self.ndiffAssertEqual
2187 s = StringIO()
2188 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002189 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002190 eq(text, s.getvalue())
2191
2192 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002193 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002194 msg, text = self._msgobj('msg_01.txt')
2195 eq(msg.get_content_type(), 'text/plain')
2196 eq(msg.get_content_maintype(), 'text')
2197 eq(msg.get_content_subtype(), 'plain')
2198 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2199 eq(msg.get_param('charset'), 'us-ascii')
2200 eq(msg.preamble, None)
2201 eq(msg.epilogue, None)
2202 self._idempotent(msg, text)
2203
2204 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002205 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002206 msg, text = self._msgobj('msg_03.txt')
2207 eq(msg.get_content_type(), 'text/plain')
2208 eq(msg.get_params(), None)
2209 eq(msg.get_param('charset'), None)
2210 self._idempotent(msg, text)
2211
2212 def test_simple_multipart(self):
2213 msg, text = self._msgobj('msg_04.txt')
2214 self._idempotent(msg, text)
2215
2216 def test_MIME_digest(self):
2217 msg, text = self._msgobj('msg_02.txt')
2218 self._idempotent(msg, text)
2219
2220 def test_long_header(self):
2221 msg, text = self._msgobj('msg_27.txt')
2222 self._idempotent(msg, text)
2223
2224 def test_MIME_digest_with_part_headers(self):
2225 msg, text = self._msgobj('msg_28.txt')
2226 self._idempotent(msg, text)
2227
2228 def test_mixed_with_image(self):
2229 msg, text = self._msgobj('msg_06.txt')
2230 self._idempotent(msg, text)
2231
2232 def test_multipart_report(self):
2233 msg, text = self._msgobj('msg_05.txt')
2234 self._idempotent(msg, text)
2235
2236 def test_dsn(self):
2237 msg, text = self._msgobj('msg_16.txt')
2238 self._idempotent(msg, text)
2239
2240 def test_preamble_epilogue(self):
2241 msg, text = self._msgobj('msg_21.txt')
2242 self._idempotent(msg, text)
2243
2244 def test_multipart_one_part(self):
2245 msg, text = self._msgobj('msg_23.txt')
2246 self._idempotent(msg, text)
2247
2248 def test_multipart_no_parts(self):
2249 msg, text = self._msgobj('msg_24.txt')
2250 self._idempotent(msg, text)
2251
2252 def test_no_start_boundary(self):
2253 msg, text = self._msgobj('msg_31.txt')
2254 self._idempotent(msg, text)
2255
2256 def test_rfc2231_charset(self):
2257 msg, text = self._msgobj('msg_32.txt')
2258 self._idempotent(msg, text)
2259
2260 def test_more_rfc2231_parameters(self):
2261 msg, text = self._msgobj('msg_33.txt')
2262 self._idempotent(msg, text)
2263
2264 def test_text_plain_in_a_multipart_digest(self):
2265 msg, text = self._msgobj('msg_34.txt')
2266 self._idempotent(msg, text)
2267
2268 def test_nested_multipart_mixeds(self):
2269 msg, text = self._msgobj('msg_12a.txt')
2270 self._idempotent(msg, text)
2271
2272 def test_message_external_body_idempotent(self):
2273 msg, text = self._msgobj('msg_36.txt')
2274 self._idempotent(msg, text)
2275
R. David Murray719a4492010-11-21 16:53:48 +00002276 def test_message_delivery_status(self):
2277 msg, text = self._msgobj('msg_43.txt')
2278 self._idempotent(msg, text, unixfrom=True)
2279
R. David Murray96fd54e2010-10-08 15:55:28 +00002280 def test_message_signed_idempotent(self):
2281 msg, text = self._msgobj('msg_45.txt')
2282 self._idempotent(msg, text)
2283
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002284 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002285 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002286 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002287 # Get a message object and reset the seek pointer for other tests
2288 msg, text = self._msgobj('msg_05.txt')
2289 eq(msg.get_content_type(), 'multipart/report')
2290 # Test the Content-Type: parameters
2291 params = {}
2292 for pk, pv in msg.get_params():
2293 params[pk] = pv
2294 eq(params['report-type'], 'delivery-status')
2295 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002296 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2297 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002298 eq(len(msg.get_payload()), 3)
2299 # Make sure the subparts are what we expect
2300 msg1 = msg.get_payload(0)
2301 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002302 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002303 msg2 = msg.get_payload(1)
2304 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002305 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002306 msg3 = msg.get_payload(2)
2307 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002308 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002309 payload = msg3.get_payload()
2310 unless(isinstance(payload, list))
2311 eq(len(payload), 1)
2312 msg4 = payload[0]
2313 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002314 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002315
2316 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002317 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002318 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002319 msg, text = self._msgobj('msg_06.txt')
2320 # Check some of the outer headers
2321 eq(msg.get_content_type(), 'message/rfc822')
2322 # Make sure the payload is a list of exactly one sub-Message, and that
2323 # that submessage has a type of text/plain
2324 payload = msg.get_payload()
2325 unless(isinstance(payload, list))
2326 eq(len(payload), 1)
2327 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002328 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002329 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002330 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002331 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002332
2333
Ezio Melottib3aedd42010-11-20 19:04:17 +00002334
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002335# Test various other bits of the package's functionality
2336class TestMiscellaneous(TestEmailBase):
2337 def test_message_from_string(self):
2338 with openfile('msg_01.txt') as fp:
2339 text = fp.read()
2340 msg = email.message_from_string(text)
2341 s = StringIO()
2342 # Don't wrap/continue long headers since we're trying to test
2343 # idempotency.
2344 g = Generator(s, maxheaderlen=0)
2345 g.flatten(msg)
2346 self.assertEqual(text, s.getvalue())
2347
2348 def test_message_from_file(self):
2349 with openfile('msg_01.txt') as fp:
2350 text = fp.read()
2351 fp.seek(0)
2352 msg = email.message_from_file(fp)
2353 s = StringIO()
2354 # Don't wrap/continue long headers since we're trying to test
2355 # idempotency.
2356 g = Generator(s, maxheaderlen=0)
2357 g.flatten(msg)
2358 self.assertEqual(text, s.getvalue())
2359
2360 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002361 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002362 with openfile('msg_01.txt') as fp:
2363 text = fp.read()
2364
2365 # Create a subclass
2366 class MyMessage(Message):
2367 pass
2368
2369 msg = email.message_from_string(text, MyMessage)
2370 unless(isinstance(msg, MyMessage))
2371 # Try something more complicated
2372 with openfile('msg_02.txt') as fp:
2373 text = fp.read()
2374 msg = email.message_from_string(text, MyMessage)
2375 for subpart in msg.walk():
2376 unless(isinstance(subpart, MyMessage))
2377
2378 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002379 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002380 # Create a subclass
2381 class MyMessage(Message):
2382 pass
2383
2384 with openfile('msg_01.txt') as fp:
2385 msg = email.message_from_file(fp, MyMessage)
2386 unless(isinstance(msg, MyMessage))
2387 # Try something more complicated
2388 with openfile('msg_02.txt') as fp:
2389 msg = email.message_from_file(fp, MyMessage)
2390 for subpart in msg.walk():
2391 unless(isinstance(subpart, MyMessage))
2392
2393 def test__all__(self):
2394 module = __import__('email')
2395 # Can't use sorted() here due to Python 2.3 compatibility
2396 all = module.__all__[:]
2397 all.sort()
2398 self.assertEqual(all, [
2399 'base64mime', 'charset', 'encoders', 'errors', 'generator',
R. David Murray96fd54e2010-10-08 15:55:28 +00002400 'header', 'iterators', 'message', 'message_from_binary_file',
2401 'message_from_bytes', 'message_from_file',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002402 'message_from_string', 'mime', 'parser',
2403 'quoprimime', 'utils',
2404 ])
2405
2406 def test_formatdate(self):
2407 now = time.time()
2408 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2409 time.gmtime(now)[:6])
2410
2411 def test_formatdate_localtime(self):
2412 now = time.time()
2413 self.assertEqual(
2414 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2415 time.localtime(now)[:6])
2416
2417 def test_formatdate_usegmt(self):
2418 now = time.time()
2419 self.assertEqual(
2420 utils.formatdate(now, localtime=False),
2421 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2422 self.assertEqual(
2423 utils.formatdate(now, localtime=False, usegmt=True),
2424 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2425
2426 def test_parsedate_none(self):
2427 self.assertEqual(utils.parsedate(''), None)
2428
2429 def test_parsedate_compact(self):
2430 # The FWS after the comma is optional
2431 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2432 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2433
2434 def test_parsedate_no_dayofweek(self):
2435 eq = self.assertEqual
2436 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2437 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2438
2439 def test_parsedate_compact_no_dayofweek(self):
2440 eq = self.assertEqual
2441 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2442 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2443
R. David Murray4a62e892010-12-23 20:35:46 +00002444 def test_parsedate_no_space_before_positive_offset(self):
2445 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2446 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2447
2448 def test_parsedate_no_space_before_negative_offset(self):
2449 # Issue 1155362: we already handled '+' for this case.
2450 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2451 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2452
2453
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002454 def test_parsedate_acceptable_to_time_functions(self):
2455 eq = self.assertEqual
2456 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2457 t = int(time.mktime(timetup))
2458 eq(time.localtime(t)[:6], timetup[:6])
2459 eq(int(time.strftime('%Y', timetup)), 2003)
2460 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2461 t = int(time.mktime(timetup[:9]))
2462 eq(time.localtime(t)[:6], timetup[:6])
2463 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2464
R. David Murray219d1c82010-08-25 00:45:55 +00002465 def test_parsedate_y2k(self):
2466 """Test for parsing a date with a two-digit year.
2467
2468 Parsing a date with a two-digit year should return the correct
2469 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2470 obsoletes RFC822) requires four-digit years.
2471
2472 """
2473 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2474 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2475 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2476 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2477
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002478 def test_parseaddr_empty(self):
2479 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2480 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2481
2482 def test_noquote_dump(self):
2483 self.assertEqual(
2484 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2485 'A Silly Person <person@dom.ain>')
2486
2487 def test_escape_dump(self):
2488 self.assertEqual(
2489 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2490 r'"A \(Very\) Silly Person" <person@dom.ain>')
2491 a = r'A \(Special\) Person'
2492 b = 'person@dom.ain'
2493 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2494
2495 def test_escape_backslashes(self):
2496 self.assertEqual(
2497 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2498 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2499 a = r'Arthur \Backslash\ Foobar'
2500 b = 'person@dom.ain'
2501 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2502
2503 def test_name_with_dot(self):
2504 x = 'John X. Doe <jxd@example.com>'
2505 y = '"John X. Doe" <jxd@example.com>'
2506 a, b = ('John X. Doe', 'jxd@example.com')
2507 self.assertEqual(utils.parseaddr(x), (a, b))
2508 self.assertEqual(utils.parseaddr(y), (a, b))
2509 # formataddr() quotes the name if there's a dot in it
2510 self.assertEqual(utils.formataddr((a, b)), y)
2511
R. David Murray5397e862010-10-02 15:58:26 +00002512 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2513 # issue 10005. Note that in the third test the second pair of
2514 # backslashes is not actually a quoted pair because it is not inside a
2515 # comment or quoted string: the address being parsed has a quoted
2516 # string containing a quoted backslash, followed by 'example' and two
2517 # backslashes, followed by another quoted string containing a space and
2518 # the word 'example'. parseaddr copies those two backslashes
2519 # literally. Per rfc5322 this is not technically correct since a \ may
2520 # not appear in an address outside of a quoted string. It is probably
2521 # a sensible Postel interpretation, though.
2522 eq = self.assertEqual
2523 eq(utils.parseaddr('""example" example"@example.com'),
2524 ('', '""example" example"@example.com'))
2525 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2526 ('', '"\\"example\\" example"@example.com'))
2527 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2528 ('', '"\\\\"example\\\\" example"@example.com'))
2529
R. David Murray63563cd2010-12-18 18:25:38 +00002530 def test_parseaddr_preserves_spaces_in_local_part(self):
2531 # issue 9286. A normal RFC5322 local part should not contain any
2532 # folding white space, but legacy local parts can (they are a sequence
2533 # of atoms, not dotatoms). On the other hand we strip whitespace from
2534 # before the @ and around dots, on the assumption that the whitespace
2535 # around the punctuation is a mistake in what would otherwise be
2536 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2537 self.assertEqual(('', "merwok wok@xample.com"),
2538 utils.parseaddr("merwok wok@xample.com"))
2539 self.assertEqual(('', "merwok wok@xample.com"),
2540 utils.parseaddr("merwok wok@xample.com"))
2541 self.assertEqual(('', "merwok wok@xample.com"),
2542 utils.parseaddr(" merwok wok @xample.com"))
2543 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2544 utils.parseaddr('merwok"wok" wok@xample.com'))
2545 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2546 utils.parseaddr('merwok. wok . wok@xample.com'))
2547
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002548 def test_multiline_from_comment(self):
2549 x = """\
2550Foo
2551\tBar <foo@example.com>"""
2552 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2553
2554 def test_quote_dump(self):
2555 self.assertEqual(
2556 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2557 r'"A Silly; Person" <person@dom.ain>')
2558
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002559 def test_charset_richcomparisons(self):
2560 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002561 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002562 cset1 = Charset()
2563 cset2 = Charset()
2564 eq(cset1, 'us-ascii')
2565 eq(cset1, 'US-ASCII')
2566 eq(cset1, 'Us-AsCiI')
2567 eq('us-ascii', cset1)
2568 eq('US-ASCII', cset1)
2569 eq('Us-AsCiI', cset1)
2570 ne(cset1, 'usascii')
2571 ne(cset1, 'USASCII')
2572 ne(cset1, 'UsAsCiI')
2573 ne('usascii', cset1)
2574 ne('USASCII', cset1)
2575 ne('UsAsCiI', cset1)
2576 eq(cset1, cset2)
2577 eq(cset2, cset1)
2578
2579 def test_getaddresses(self):
2580 eq = self.assertEqual
2581 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2582 'Bud Person <bperson@dom.ain>']),
2583 [('Al Person', 'aperson@dom.ain'),
2584 ('Bud Person', 'bperson@dom.ain')])
2585
2586 def test_getaddresses_nasty(self):
2587 eq = self.assertEqual
2588 eq(utils.getaddresses(['foo: ;']), [('', '')])
2589 eq(utils.getaddresses(
2590 ['[]*-- =~$']),
2591 [('', ''), ('', ''), ('', '*--')])
2592 eq(utils.getaddresses(
2593 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2594 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2595
2596 def test_getaddresses_embedded_comment(self):
2597 """Test proper handling of a nested comment"""
2598 eq = self.assertEqual
2599 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2600 eq(addrs[0][1], 'foo@bar.com')
2601
2602 def test_utils_quote_unquote(self):
2603 eq = self.assertEqual
2604 msg = Message()
2605 msg.add_header('content-disposition', 'attachment',
2606 filename='foo\\wacky"name')
2607 eq(msg.get_filename(), 'foo\\wacky"name')
2608
2609 def test_get_body_encoding_with_bogus_charset(self):
2610 charset = Charset('not a charset')
2611 self.assertEqual(charset.get_body_encoding(), 'base64')
2612
2613 def test_get_body_encoding_with_uppercase_charset(self):
2614 eq = self.assertEqual
2615 msg = Message()
2616 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2617 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2618 charsets = msg.get_charsets()
2619 eq(len(charsets), 1)
2620 eq(charsets[0], 'utf-8')
2621 charset = Charset(charsets[0])
2622 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002623 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002624 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2625 eq(msg.get_payload(decode=True), b'hello world')
2626 eq(msg['content-transfer-encoding'], 'base64')
2627 # Try another one
2628 msg = Message()
2629 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2630 charsets = msg.get_charsets()
2631 eq(len(charsets), 1)
2632 eq(charsets[0], 'us-ascii')
2633 charset = Charset(charsets[0])
2634 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2635 msg.set_payload('hello world', charset=charset)
2636 eq(msg.get_payload(), 'hello world')
2637 eq(msg['content-transfer-encoding'], '7bit')
2638
2639 def test_charsets_case_insensitive(self):
2640 lc = Charset('us-ascii')
2641 uc = Charset('US-ASCII')
2642 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2643
2644 def test_partial_falls_inside_message_delivery_status(self):
2645 eq = self.ndiffAssertEqual
2646 # The Parser interface provides chunks of data to FeedParser in 8192
2647 # byte gulps. SF bug #1076485 found one of those chunks inside
2648 # message/delivery-status header block, which triggered an
2649 # unreadline() of NeedMoreData.
2650 msg = self._msgobj('msg_43.txt')
2651 sfp = StringIO()
2652 iterators._structure(msg, sfp)
2653 eq(sfp.getvalue(), """\
2654multipart/report
2655 text/plain
2656 message/delivery-status
2657 text/plain
2658 text/plain
2659 text/plain
2660 text/plain
2661 text/plain
2662 text/plain
2663 text/plain
2664 text/plain
2665 text/plain
2666 text/plain
2667 text/plain
2668 text/plain
2669 text/plain
2670 text/plain
2671 text/plain
2672 text/plain
2673 text/plain
2674 text/plain
2675 text/plain
2676 text/plain
2677 text/plain
2678 text/plain
2679 text/plain
2680 text/plain
2681 text/plain
2682 text/plain
2683 text/rfc822-headers
2684""")
2685
R. David Murraya0b44b52010-12-02 21:47:19 +00002686 def test_make_msgid_domain(self):
2687 self.assertEqual(
2688 email.utils.make_msgid(domain='testdomain-string')[-19:],
2689 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002690
Ezio Melottib3aedd42010-11-20 19:04:17 +00002691
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002692# Test the iterator/generators
2693class TestIterators(TestEmailBase):
2694 def test_body_line_iterator(self):
2695 eq = self.assertEqual
2696 neq = self.ndiffAssertEqual
2697 # First a simple non-multipart message
2698 msg = self._msgobj('msg_01.txt')
2699 it = iterators.body_line_iterator(msg)
2700 lines = list(it)
2701 eq(len(lines), 6)
2702 neq(EMPTYSTRING.join(lines), msg.get_payload())
2703 # Now a more complicated multipart
2704 msg = self._msgobj('msg_02.txt')
2705 it = iterators.body_line_iterator(msg)
2706 lines = list(it)
2707 eq(len(lines), 43)
2708 with openfile('msg_19.txt') as fp:
2709 neq(EMPTYSTRING.join(lines), fp.read())
2710
2711 def test_typed_subpart_iterator(self):
2712 eq = self.assertEqual
2713 msg = self._msgobj('msg_04.txt')
2714 it = iterators.typed_subpart_iterator(msg, 'text')
2715 lines = []
2716 subparts = 0
2717 for subpart in it:
2718 subparts += 1
2719 lines.append(subpart.get_payload())
2720 eq(subparts, 2)
2721 eq(EMPTYSTRING.join(lines), """\
2722a simple kind of mirror
2723to reflect upon our own
2724a simple kind of mirror
2725to reflect upon our own
2726""")
2727
2728 def test_typed_subpart_iterator_default_type(self):
2729 eq = self.assertEqual
2730 msg = self._msgobj('msg_03.txt')
2731 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2732 lines = []
2733 subparts = 0
2734 for subpart in it:
2735 subparts += 1
2736 lines.append(subpart.get_payload())
2737 eq(subparts, 1)
2738 eq(EMPTYSTRING.join(lines), """\
2739
2740Hi,
2741
2742Do you like this message?
2743
2744-Me
2745""")
2746
R. David Murray45bf773f2010-07-17 01:19:57 +00002747 def test_pushCR_LF(self):
2748 '''FeedParser BufferedSubFile.push() assumed it received complete
2749 line endings. A CR ending one push() followed by a LF starting
2750 the next push() added an empty line.
2751 '''
2752 imt = [
2753 ("a\r \n", 2),
2754 ("b", 0),
2755 ("c\n", 1),
2756 ("", 0),
2757 ("d\r\n", 1),
2758 ("e\r", 0),
2759 ("\nf", 1),
2760 ("\r\n", 1),
2761 ]
2762 from email.feedparser import BufferedSubFile, NeedMoreData
2763 bsf = BufferedSubFile()
2764 om = []
2765 nt = 0
2766 for il, n in imt:
2767 bsf.push(il)
2768 nt += n
2769 n1 = 0
2770 while True:
2771 ol = bsf.readline()
2772 if ol == NeedMoreData:
2773 break
2774 om.append(ol)
2775 n1 += 1
2776 self.assertTrue(n == n1)
2777 self.assertTrue(len(om) == nt)
2778 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2779
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002780
Ezio Melottib3aedd42010-11-20 19:04:17 +00002781
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002782class TestParsers(TestEmailBase):
2783 def test_header_parser(self):
2784 eq = self.assertEqual
2785 # Parse only the headers of a complex multipart MIME document
2786 with openfile('msg_02.txt') as fp:
2787 msg = HeaderParser().parse(fp)
2788 eq(msg['from'], 'ppp-request@zzz.org')
2789 eq(msg['to'], 'ppp@zzz.org')
2790 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002791 self.assertFalse(msg.is_multipart())
2792 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002793
2794 def test_whitespace_continuation(self):
2795 eq = self.assertEqual
2796 # This message contains a line after the Subject: header that has only
2797 # whitespace, but it is not empty!
2798 msg = email.message_from_string("""\
2799From: aperson@dom.ain
2800To: bperson@dom.ain
2801Subject: the next line has a space on it
2802\x20
2803Date: Mon, 8 Apr 2002 15:09:19 -0400
2804Message-ID: spam
2805
2806Here's the message body
2807""")
2808 eq(msg['subject'], 'the next line has a space on it\n ')
2809 eq(msg['message-id'], 'spam')
2810 eq(msg.get_payload(), "Here's the message body\n")
2811
2812 def test_whitespace_continuation_last_header(self):
2813 eq = self.assertEqual
2814 # Like the previous test, but the subject line is the last
2815 # header.
2816 msg = email.message_from_string("""\
2817From: aperson@dom.ain
2818To: bperson@dom.ain
2819Date: Mon, 8 Apr 2002 15:09:19 -0400
2820Message-ID: spam
2821Subject: the next line has a space on it
2822\x20
2823
2824Here's the message body
2825""")
2826 eq(msg['subject'], 'the next line has a space on it\n ')
2827 eq(msg['message-id'], 'spam')
2828 eq(msg.get_payload(), "Here's the message body\n")
2829
2830 def test_crlf_separation(self):
2831 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002832 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002833 msg = Parser().parse(fp)
2834 eq(len(msg.get_payload()), 2)
2835 part1 = msg.get_payload(0)
2836 eq(part1.get_content_type(), 'text/plain')
2837 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2838 part2 = msg.get_payload(1)
2839 eq(part2.get_content_type(), 'application/riscos')
2840
R. David Murray8451c4b2010-10-23 22:19:56 +00002841 def test_crlf_flatten(self):
2842 # Using newline='\n' preserves the crlfs in this input file.
2843 with openfile('msg_26.txt', newline='\n') as fp:
2844 text = fp.read()
2845 msg = email.message_from_string(text)
2846 s = StringIO()
2847 g = Generator(s)
2848 g.flatten(msg, linesep='\r\n')
2849 self.assertEqual(s.getvalue(), text)
2850
2851 maxDiff = None
2852
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002853 def test_multipart_digest_with_extra_mime_headers(self):
2854 eq = self.assertEqual
2855 neq = self.ndiffAssertEqual
2856 with openfile('msg_28.txt') as fp:
2857 msg = email.message_from_file(fp)
2858 # Structure is:
2859 # multipart/digest
2860 # message/rfc822
2861 # text/plain
2862 # message/rfc822
2863 # text/plain
2864 eq(msg.is_multipart(), 1)
2865 eq(len(msg.get_payload()), 2)
2866 part1 = msg.get_payload(0)
2867 eq(part1.get_content_type(), 'message/rfc822')
2868 eq(part1.is_multipart(), 1)
2869 eq(len(part1.get_payload()), 1)
2870 part1a = part1.get_payload(0)
2871 eq(part1a.is_multipart(), 0)
2872 eq(part1a.get_content_type(), 'text/plain')
2873 neq(part1a.get_payload(), 'message 1\n')
2874 # next message/rfc822
2875 part2 = msg.get_payload(1)
2876 eq(part2.get_content_type(), 'message/rfc822')
2877 eq(part2.is_multipart(), 1)
2878 eq(len(part2.get_payload()), 1)
2879 part2a = part2.get_payload(0)
2880 eq(part2a.is_multipart(), 0)
2881 eq(part2a.get_content_type(), 'text/plain')
2882 neq(part2a.get_payload(), 'message 2\n')
2883
2884 def test_three_lines(self):
2885 # A bug report by Andrew McNamara
2886 lines = ['From: Andrew Person <aperson@dom.ain',
2887 'Subject: Test',
2888 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2889 msg = email.message_from_string(NL.join(lines))
2890 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
2891
2892 def test_strip_line_feed_and_carriage_return_in_headers(self):
2893 eq = self.assertEqual
2894 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
2895 value1 = 'text'
2896 value2 = 'more text'
2897 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
2898 value1, value2)
2899 msg = email.message_from_string(m)
2900 eq(msg.get('Header'), value1)
2901 eq(msg.get('Next-Header'), value2)
2902
2903 def test_rfc2822_header_syntax(self):
2904 eq = self.assertEqual
2905 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2906 msg = email.message_from_string(m)
2907 eq(len(msg), 3)
2908 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
2909 eq(msg.get_payload(), 'body')
2910
2911 def test_rfc2822_space_not_allowed_in_header(self):
2912 eq = self.assertEqual
2913 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2914 msg = email.message_from_string(m)
2915 eq(len(msg.keys()), 0)
2916
2917 def test_rfc2822_one_character_header(self):
2918 eq = self.assertEqual
2919 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
2920 msg = email.message_from_string(m)
2921 headers = msg.keys()
2922 headers.sort()
2923 eq(headers, ['A', 'B', 'CC'])
2924 eq(msg.get_payload(), 'body')
2925
R. David Murray45e0e142010-06-16 02:19:40 +00002926 def test_CRLFLF_at_end_of_part(self):
2927 # issue 5610: feedparser should not eat two chars from body part ending
2928 # with "\r\n\n".
2929 m = (
2930 "From: foo@bar.com\n"
2931 "To: baz\n"
2932 "Mime-Version: 1.0\n"
2933 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
2934 "\n"
2935 "--BOUNDARY\n"
2936 "Content-Type: text/plain\n"
2937 "\n"
2938 "body ending with CRLF newline\r\n"
2939 "\n"
2940 "--BOUNDARY--\n"
2941 )
2942 msg = email.message_from_string(m)
2943 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002944
Ezio Melottib3aedd42010-11-20 19:04:17 +00002945
R. David Murray96fd54e2010-10-08 15:55:28 +00002946class Test8BitBytesHandling(unittest.TestCase):
2947 # In Python3 all input is string, but that doesn't work if the actual input
2948 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
2949 # decode byte streams using the surrogateescape error handler, and
2950 # reconvert to binary at appropriate places if we detect surrogates. This
2951 # doesn't allow us to transform headers with 8bit bytes (they get munged),
2952 # but it does allow us to parse and preserve them, and to decode body
2953 # parts that use an 8bit CTE.
2954
2955 bodytest_msg = textwrap.dedent("""\
2956 From: foo@bar.com
2957 To: baz
2958 Mime-Version: 1.0
2959 Content-Type: text/plain; charset={charset}
2960 Content-Transfer-Encoding: {cte}
2961
2962 {bodyline}
2963 """)
2964
2965 def test_known_8bit_CTE(self):
2966 m = self.bodytest_msg.format(charset='utf-8',
2967 cte='8bit',
2968 bodyline='pöstal').encode('utf-8')
2969 msg = email.message_from_bytes(m)
2970 self.assertEqual(msg.get_payload(), "pöstal\n")
2971 self.assertEqual(msg.get_payload(decode=True),
2972 "pöstal\n".encode('utf-8'))
2973
2974 def test_unknown_8bit_CTE(self):
2975 m = self.bodytest_msg.format(charset='notavalidcharset',
2976 cte='8bit',
2977 bodyline='pöstal').encode('utf-8')
2978 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00002979 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00002980 self.assertEqual(msg.get_payload(decode=True),
2981 "pöstal\n".encode('utf-8'))
2982
2983 def test_8bit_in_quopri_body(self):
2984 # This is non-RFC compliant data...without 'decode' the library code
2985 # decodes the body using the charset from the headers, and because the
2986 # source byte really is utf-8 this works. This is likely to fail
2987 # against real dirty data (ie: produce mojibake), but the data is
2988 # invalid anyway so it is as good a guess as any. But this means that
2989 # this test just confirms the current behavior; that behavior is not
2990 # necessarily the best possible behavior. With 'decode' it is
2991 # returning the raw bytes, so that test should be of correct behavior,
2992 # or at least produce the same result that email4 did.
2993 m = self.bodytest_msg.format(charset='utf-8',
2994 cte='quoted-printable',
2995 bodyline='p=C3=B6stál').encode('utf-8')
2996 msg = email.message_from_bytes(m)
2997 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
2998 self.assertEqual(msg.get_payload(decode=True),
2999 'pöstál\n'.encode('utf-8'))
3000
3001 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3002 # This is similar to the previous test, but proves that if the 8bit
3003 # byte is undecodeable in the specified charset, it gets replaced
3004 # by the unicode 'unknown' character. Again, this may or may not
3005 # be the ideal behavior. Note that if decode=False none of the
3006 # decoders will get involved, so this is the only test we need
3007 # for this behavior.
3008 m = self.bodytest_msg.format(charset='ascii',
3009 cte='quoted-printable',
3010 bodyline='p=C3=B6stál').encode('utf-8')
3011 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003012 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003013 self.assertEqual(msg.get_payload(decode=True),
3014 'pöstál\n'.encode('utf-8'))
3015
3016 def test_8bit_in_base64_body(self):
3017 # Sticking an 8bit byte in a base64 block makes it undecodable by
3018 # normal means, so the block is returned undecoded, but as bytes.
3019 m = self.bodytest_msg.format(charset='utf-8',
3020 cte='base64',
3021 bodyline='cMO2c3RhbAá=').encode('utf-8')
3022 msg = email.message_from_bytes(m)
3023 self.assertEqual(msg.get_payload(decode=True),
3024 'cMO2c3RhbAá=\n'.encode('utf-8'))
3025
3026 def test_8bit_in_uuencode_body(self):
3027 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3028 # normal means, so the block is returned undecoded, but as bytes.
3029 m = self.bodytest_msg.format(charset='utf-8',
3030 cte='uuencode',
3031 bodyline='<,.V<W1A; á ').encode('utf-8')
3032 msg = email.message_from_bytes(m)
3033 self.assertEqual(msg.get_payload(decode=True),
3034 '<,.V<W1A; á \n'.encode('utf-8'))
3035
3036
R. David Murray92532142011-01-07 23:25:30 +00003037 headertest_headers = (
3038 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3039 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3040 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3041 '\tJean de Baddie',
3042 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3043 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3044 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3045 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3046 )
3047 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3048 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003049
3050 def test_get_8bit_header(self):
3051 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003052 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3053 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003054
3055 def test_print_8bit_headers(self):
3056 msg = email.message_from_bytes(self.headertest_msg)
3057 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003058 textwrap.dedent("""\
3059 From: {}
3060 To: {}
3061 Subject: {}
3062 From: {}
3063
3064 Yes, they are flying.
3065 """).format(*[expected[1] for (_, expected) in
3066 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003067
3068 def test_values_with_8bit_headers(self):
3069 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003070 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003071 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003072 'b\uFFFD\uFFFDz',
3073 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3074 'coll\uFFFD\uFFFDgue, le pouf '
3075 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003076 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003077 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003078
3079 def test_items_with_8bit_headers(self):
3080 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003081 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003082 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003083 ('To', 'b\uFFFD\uFFFDz'),
3084 ('Subject', 'Maintenant je vous '
3085 'pr\uFFFD\uFFFDsente '
3086 'mon coll\uFFFD\uFFFDgue, le pouf '
3087 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3088 '\tJean de Baddie'),
3089 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003090
3091 def test_get_all_with_8bit_headers(self):
3092 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003093 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003094 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003095 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003096
R David Murraya2150232011-03-16 21:11:23 -04003097 def test_get_content_type_with_8bit(self):
3098 msg = email.message_from_bytes(textwrap.dedent("""\
3099 Content-Type: text/pl\xA7in; charset=utf-8
3100 """).encode('latin-1'))
3101 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3102 self.assertEqual(msg.get_content_maintype(), "text")
3103 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3104
3105 def test_get_params_with_8bit(self):
3106 msg = email.message_from_bytes(
3107 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3108 self.assertEqual(msg.get_params(header='x-header'),
3109 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3110 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3111 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3112 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3113
3114 def test_get_rfc2231_params_with_8bit(self):
3115 msg = email.message_from_bytes(textwrap.dedent("""\
3116 Content-Type: text/plain; charset=us-ascii;
3117 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3118 ).encode('latin-1'))
3119 self.assertEqual(msg.get_param('title'),
3120 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3121
3122 def test_set_rfc2231_params_with_8bit(self):
3123 msg = email.message_from_bytes(textwrap.dedent("""\
3124 Content-Type: text/plain; charset=us-ascii;
3125 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3126 ).encode('latin-1'))
3127 msg.set_param('title', 'test')
3128 self.assertEqual(msg.get_param('title'), 'test')
3129
3130 def test_del_rfc2231_params_with_8bit(self):
3131 msg = email.message_from_bytes(textwrap.dedent("""\
3132 Content-Type: text/plain; charset=us-ascii;
3133 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3134 ).encode('latin-1'))
3135 msg.del_param('title')
3136 self.assertEqual(msg.get_param('title'), None)
3137 self.assertEqual(msg.get_content_maintype(), 'text')
3138
3139 def test_get_payload_with_8bit_cte_header(self):
3140 msg = email.message_from_bytes(textwrap.dedent("""\
3141 Content-Transfer-Encoding: b\xa7se64
3142 Content-Type: text/plain; charset=latin-1
3143
3144 payload
3145 """).encode('latin-1'))
3146 self.assertEqual(msg.get_payload(), 'payload\n')
3147 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3148
R. David Murray96fd54e2010-10-08 15:55:28 +00003149 non_latin_bin_msg = textwrap.dedent("""\
3150 From: foo@bar.com
3151 To: báz
3152 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3153 \tJean de Baddie
3154 Mime-Version: 1.0
3155 Content-Type: text/plain; charset="utf-8"
3156 Content-Transfer-Encoding: 8bit
3157
3158 Да, они летят.
3159 """).encode('utf-8')
3160
3161 def test_bytes_generator(self):
3162 msg = email.message_from_bytes(self.non_latin_bin_msg)
3163 out = BytesIO()
3164 email.generator.BytesGenerator(out).flatten(msg)
3165 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3166
R. David Murray7372a072011-01-26 21:21:32 +00003167 def test_bytes_generator_handles_None_body(self):
3168 #Issue 11019
3169 msg = email.message.Message()
3170 out = BytesIO()
3171 email.generator.BytesGenerator(out).flatten(msg)
3172 self.assertEqual(out.getvalue(), b"\n")
3173
R. David Murray92532142011-01-07 23:25:30 +00003174 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003175 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003176 To: =?unknown-8bit?q?b=C3=A1z?=
3177 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3178 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3179 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003180 Mime-Version: 1.0
3181 Content-Type: text/plain; charset="utf-8"
3182 Content-Transfer-Encoding: base64
3183
3184 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3185 """)
3186
3187 def test_generator_handles_8bit(self):
3188 msg = email.message_from_bytes(self.non_latin_bin_msg)
3189 out = StringIO()
3190 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003191 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003192
3193 def test_bytes_generator_with_unix_from(self):
3194 # The unixfrom contains a current date, so we can't check it
3195 # literally. Just make sure the first word is 'From' and the
3196 # rest of the message matches the input.
3197 msg = email.message_from_bytes(self.non_latin_bin_msg)
3198 out = BytesIO()
3199 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3200 lines = out.getvalue().split(b'\n')
3201 self.assertEqual(lines[0].split()[0], b'From')
3202 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3203
R. David Murray92532142011-01-07 23:25:30 +00003204 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3205 non_latin_bin_msg_as7bit[2:4] = [
3206 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3207 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3208 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3209
R. David Murray96fd54e2010-10-08 15:55:28 +00003210 def test_message_from_binary_file(self):
3211 fn = 'test.msg'
3212 self.addCleanup(unlink, fn)
3213 with open(fn, 'wb') as testfile:
3214 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003215 with open(fn, 'rb') as testfile:
3216 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003217 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3218
3219 latin_bin_msg = textwrap.dedent("""\
3220 From: foo@bar.com
3221 To: Dinsdale
3222 Subject: Nudge nudge, wink, wink
3223 Mime-Version: 1.0
3224 Content-Type: text/plain; charset="latin-1"
3225 Content-Transfer-Encoding: 8bit
3226
3227 oh là là, know what I mean, know what I mean?
3228 """).encode('latin-1')
3229
3230 latin_bin_msg_as7bit = textwrap.dedent("""\
3231 From: foo@bar.com
3232 To: Dinsdale
3233 Subject: Nudge nudge, wink, wink
3234 Mime-Version: 1.0
3235 Content-Type: text/plain; charset="iso-8859-1"
3236 Content-Transfer-Encoding: quoted-printable
3237
3238 oh l=E0 l=E0, know what I mean, know what I mean?
3239 """)
3240
3241 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3242 m = email.message_from_bytes(self.latin_bin_msg)
3243 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3244
3245 def test_decoded_generator_emits_unicode_body(self):
3246 m = email.message_from_bytes(self.latin_bin_msg)
3247 out = StringIO()
3248 email.generator.DecodedGenerator(out).flatten(m)
3249 #DecodedHeader output contains an extra blank line compared
3250 #to the input message. RDM: not sure if this is a bug or not,
3251 #but it is not specific to the 8bit->7bit conversion.
3252 self.assertEqual(out.getvalue(),
3253 self.latin_bin_msg.decode('latin-1')+'\n')
3254
3255 def test_bytes_feedparser(self):
3256 bfp = email.feedparser.BytesFeedParser()
3257 for i in range(0, len(self.latin_bin_msg), 10):
3258 bfp.feed(self.latin_bin_msg[i:i+10])
3259 m = bfp.close()
3260 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3261
R. David Murray8451c4b2010-10-23 22:19:56 +00003262 def test_crlf_flatten(self):
3263 with openfile('msg_26.txt', 'rb') as fp:
3264 text = fp.read()
3265 msg = email.message_from_bytes(text)
3266 s = BytesIO()
3267 g = email.generator.BytesGenerator(s)
3268 g.flatten(msg, linesep='\r\n')
3269 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003270
3271 def test_8bit_multipart(self):
3272 # Issue 11605
3273 source = textwrap.dedent("""\
3274 Date: Fri, 18 Mar 2011 17:15:43 +0100
3275 To: foo@example.com
3276 From: foodwatch-Newsletter <bar@example.com>
3277 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3278 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3279 MIME-Version: 1.0
3280 Content-Type: multipart/alternative;
3281 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3282
3283 --b1_76a486bee62b0d200f33dc2ca08220ad
3284 Content-Type: text/plain; charset="utf-8"
3285 Content-Transfer-Encoding: 8bit
3286
3287 Guten Tag, ,
3288
3289 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3290 Nachrichten aus Japan.
3291
3292
3293 --b1_76a486bee62b0d200f33dc2ca08220ad
3294 Content-Type: text/html; charset="utf-8"
3295 Content-Transfer-Encoding: 8bit
3296
3297 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3298 "http://www.w3.org/TR/html4/loose.dtd">
3299 <html lang="de">
3300 <head>
3301 <title>foodwatch - Newsletter</title>
3302 </head>
3303 <body>
3304 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3305 die Nachrichten aus Japan.</p>
3306 </body>
3307 </html>
3308 --b1_76a486bee62b0d200f33dc2ca08220ad--
3309
3310 """).encode('utf-8')
3311 msg = email.message_from_bytes(source)
3312 s = BytesIO()
3313 g = email.generator.BytesGenerator(s)
3314 g.flatten(msg)
3315 self.assertEqual(s.getvalue(), source)
3316
R. David Murray8451c4b2010-10-23 22:19:56 +00003317 maxDiff = None
3318
Ezio Melottib3aedd42010-11-20 19:04:17 +00003319
R. David Murray719a4492010-11-21 16:53:48 +00003320class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003321
R. David Murraye5db2632010-11-20 15:10:13 +00003322 maxDiff = None
3323
R. David Murray96fd54e2010-10-08 15:55:28 +00003324 def _msgobj(self, filename):
3325 with openfile(filename, 'rb') as fp:
3326 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003327 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003328 msg = email.message_from_bytes(data)
3329 return msg, data
3330
R. David Murray719a4492010-11-21 16:53:48 +00003331 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003332 b = BytesIO()
3333 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003334 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R. David Murraye5db2632010-11-20 15:10:13 +00003335 self.assertByteStringsEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003336
R. David Murraye5db2632010-11-20 15:10:13 +00003337 def assertByteStringsEqual(self, str1, str2):
R. David Murray719a4492010-11-21 16:53:48 +00003338 # Not using self.blinesep here is intentional. This way the output
3339 # is more useful when the failure results in mixed line endings.
R. David Murray96fd54e2010-10-08 15:55:28 +00003340 self.assertListEqual(str1.split(b'\n'), str2.split(b'\n'))
3341
3342
R. David Murray719a4492010-11-21 16:53:48 +00003343class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3344 TestIdempotent):
3345 linesep = '\n'
3346 blinesep = b'\n'
3347 normalize_linesep_regex = re.compile(br'\r\n')
3348
3349
3350class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3351 TestIdempotent):
3352 linesep = '\r\n'
3353 blinesep = b'\r\n'
3354 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3355
Ezio Melottib3aedd42010-11-20 19:04:17 +00003356
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003357class TestBase64(unittest.TestCase):
3358 def test_len(self):
3359 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003360 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003361 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003362 for size in range(15):
3363 if size == 0 : bsize = 0
3364 elif size <= 3 : bsize = 4
3365 elif size <= 6 : bsize = 8
3366 elif size <= 9 : bsize = 12
3367 elif size <= 12: bsize = 16
3368 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003369 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003370
3371 def test_decode(self):
3372 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003373 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003374 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003375
3376 def test_encode(self):
3377 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003378 eq(base64mime.body_encode(b''), b'')
3379 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003380 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003381 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003382 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003383 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003384eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3385eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3386eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3387eHh4eCB4eHh4IA==
3388""")
3389 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003390 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003391 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003392eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3393eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3394eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3395eHh4eCB4eHh4IA==\r
3396""")
3397
3398 def test_header_encode(self):
3399 eq = self.assertEqual
3400 he = base64mime.header_encode
3401 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003402 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3403 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003404 # Test the charset option
3405 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3406 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003407
3408
Ezio Melottib3aedd42010-11-20 19:04:17 +00003409
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003410class TestQuopri(unittest.TestCase):
3411 def setUp(self):
3412 # Set of characters (as byte integers) that don't need to be encoded
3413 # in headers.
3414 self.hlit = list(chain(
3415 range(ord('a'), ord('z') + 1),
3416 range(ord('A'), ord('Z') + 1),
3417 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003418 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003419 # Set of characters (as byte integers) that do need to be encoded in
3420 # headers.
3421 self.hnon = [c for c in range(256) if c not in self.hlit]
3422 assert len(self.hlit) + len(self.hnon) == 256
3423 # Set of characters (as byte integers) that don't need to be encoded
3424 # in bodies.
3425 self.blit = list(range(ord(' '), ord('~') + 1))
3426 self.blit.append(ord('\t'))
3427 self.blit.remove(ord('='))
3428 # Set of characters (as byte integers) that do need to be encoded in
3429 # bodies.
3430 self.bnon = [c for c in range(256) if c not in self.blit]
3431 assert len(self.blit) + len(self.bnon) == 256
3432
Guido van Rossum9604e662007-08-30 03:46:43 +00003433 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003434 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003435 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003436 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003437 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003438 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003439 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003440
Guido van Rossum9604e662007-08-30 03:46:43 +00003441 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003442 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003443 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003444 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003445 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003446 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003447 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003448
3449 def test_header_quopri_len(self):
3450 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003451 eq(quoprimime.header_length(b'hello'), 5)
3452 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003453 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003454 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003455 # =?xxx?q?...?= means 10 extra characters
3456 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003457 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3458 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003459 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003460 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003461 # =?xxx?q?...?= means 10 extra characters
3462 10)
3463 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003464 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003465 'expected length 1 for %r' % chr(c))
3466 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003467 # Space is special; it's encoded to _
3468 if c == ord(' '):
3469 continue
3470 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003471 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003472 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003473
3474 def test_body_quopri_len(self):
3475 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003476 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003477 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003478 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003479 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003480
3481 def test_quote_unquote_idempotent(self):
3482 for x in range(256):
3483 c = chr(x)
3484 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3485
R David Murrayec1b5b82011-03-23 14:19:05 -04003486 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3487 if charset is None:
3488 encoded_header = quoprimime.header_encode(header)
3489 else:
3490 encoded_header = quoprimime.header_encode(header, charset)
3491 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003492
R David Murraycafd79d2011-03-23 15:25:55 -04003493 def test_header_encode_null(self):
3494 self._test_header_encode(b'', '')
3495
R David Murrayec1b5b82011-03-23 14:19:05 -04003496 def test_header_encode_one_word(self):
3497 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3498
3499 def test_header_encode_two_lines(self):
3500 self._test_header_encode(b'hello\nworld',
3501 '=?iso-8859-1?q?hello=0Aworld?=')
3502
3503 def test_header_encode_non_ascii(self):
3504 self._test_header_encode(b'hello\xc7there',
3505 '=?iso-8859-1?q?hello=C7there?=')
3506
3507 def test_header_encode_alt_charset(self):
3508 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3509 charset='iso-8859-2')
3510
3511 def _test_header_decode(self, encoded_header, expected_decoded_header):
3512 decoded_header = quoprimime.header_decode(encoded_header)
3513 self.assertEqual(decoded_header, expected_decoded_header)
3514
3515 def test_header_decode_null(self):
3516 self._test_header_decode('', '')
3517
3518 def test_header_decode_one_word(self):
3519 self._test_header_decode('hello', 'hello')
3520
3521 def test_header_decode_two_lines(self):
3522 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3523
3524 def test_header_decode_non_ascii(self):
3525 self._test_header_decode('hello=C7there', 'hello\xc7there')
3526
3527 def _test_decode(self, encoded, expected_decoded, eol=None):
3528 if eol is None:
3529 decoded = quoprimime.decode(encoded)
3530 else:
3531 decoded = quoprimime.decode(encoded, eol=eol)
3532 self.assertEqual(decoded, expected_decoded)
3533
3534 def test_decode_null_word(self):
3535 self._test_decode('', '')
3536
3537 def test_decode_null_line_null_word(self):
3538 self._test_decode('\r\n', '\n')
3539
3540 def test_decode_one_word(self):
3541 self._test_decode('hello', 'hello')
3542
3543 def test_decode_one_word_eol(self):
3544 self._test_decode('hello', 'hello', eol='X')
3545
3546 def test_decode_one_line(self):
3547 self._test_decode('hello\r\n', 'hello\n')
3548
3549 def test_decode_one_line_lf(self):
3550 self._test_decode('hello\n', 'hello\n')
3551
R David Murraycafd79d2011-03-23 15:25:55 -04003552 def test_decode_one_line_cr(self):
3553 self._test_decode('hello\r', 'hello\n')
3554
3555 def test_decode_one_line_nl(self):
3556 self._test_decode('hello\n', 'helloX', eol='X')
3557
3558 def test_decode_one_line_crnl(self):
3559 self._test_decode('hello\r\n', 'helloX', eol='X')
3560
R David Murrayec1b5b82011-03-23 14:19:05 -04003561 def test_decode_one_line_one_word(self):
3562 self._test_decode('hello\r\nworld', 'hello\nworld')
3563
3564 def test_decode_one_line_one_word_eol(self):
3565 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3566
3567 def test_decode_two_lines(self):
3568 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3569
R David Murraycafd79d2011-03-23 15:25:55 -04003570 def test_decode_two_lines_eol(self):
3571 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3572
R David Murrayec1b5b82011-03-23 14:19:05 -04003573 def test_decode_one_long_line(self):
3574 self._test_decode('Spam' * 250, 'Spam' * 250)
3575
3576 def test_decode_one_space(self):
3577 self._test_decode(' ', '')
3578
3579 def test_decode_multiple_spaces(self):
3580 self._test_decode(' ' * 5, '')
3581
3582 def test_decode_one_line_trailing_spaces(self):
3583 self._test_decode('hello \r\n', 'hello\n')
3584
3585 def test_decode_two_lines_trailing_spaces(self):
3586 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3587
3588 def test_decode_quoted_word(self):
3589 self._test_decode('=22quoted=20words=22', '"quoted words"')
3590
3591 def test_decode_uppercase_quoting(self):
3592 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
3593
3594 def test_decode_lowercase_quoting(self):
3595 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
3596
3597 def test_decode_soft_line_break(self):
3598 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
3599
3600 def test_decode_false_quoting(self):
3601 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
3602
3603 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
3604 kwargs = {}
3605 if maxlinelen is None:
3606 # Use body_encode's default.
3607 maxlinelen = 76
3608 else:
3609 kwargs['maxlinelen'] = maxlinelen
3610 if eol is None:
3611 # Use body_encode's default.
3612 eol = '\n'
3613 else:
3614 kwargs['eol'] = eol
3615 encoded_body = quoprimime.body_encode(body, **kwargs)
3616 self.assertEqual(encoded_body, expected_encoded_body)
3617 if eol == '\n' or eol == '\r\n':
3618 # We know how to split the result back into lines, so maxlinelen
3619 # can be checked.
3620 for line in encoded_body.splitlines():
3621 self.assertLessEqual(len(line), maxlinelen)
3622
3623 def test_encode_null(self):
3624 self._test_encode('', '')
3625
3626 def test_encode_null_lines(self):
3627 self._test_encode('\n\n', '\n\n')
3628
3629 def test_encode_one_line(self):
3630 self._test_encode('hello\n', 'hello\n')
3631
3632 def test_encode_one_line_crlf(self):
3633 self._test_encode('hello\r\n', 'hello\n')
3634
3635 def test_encode_one_line_eol(self):
3636 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
3637
3638 def test_encode_one_space(self):
3639 self._test_encode(' ', '=20')
3640
3641 def test_encode_one_line_one_space(self):
3642 self._test_encode(' \n', '=20\n')
3643
R David Murrayb938c8c2011-03-24 12:19:26 -04003644# XXX: body_encode() expect strings, but uses ord(char) from these strings
3645# to index into a 256-entry list. For code points above 255, this will fail.
3646# Should there be a check for 8-bit only ord() values in body, or at least
3647# a comment about the expected input?
3648
3649 def test_encode_two_lines_one_space(self):
3650 self._test_encode(' \n \n', '=20\n=20\n')
3651
R David Murrayec1b5b82011-03-23 14:19:05 -04003652 def test_encode_one_word_trailing_spaces(self):
3653 self._test_encode('hello ', 'hello =20')
3654
3655 def test_encode_one_line_trailing_spaces(self):
3656 self._test_encode('hello \n', 'hello =20\n')
3657
3658 def test_encode_one_word_trailing_tab(self):
3659 self._test_encode('hello \t', 'hello =09')
3660
3661 def test_encode_one_line_trailing_tab(self):
3662 self._test_encode('hello \t\n', 'hello =09\n')
3663
3664 def test_encode_trailing_space_before_maxlinelen(self):
3665 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
3666
R David Murrayb938c8c2011-03-24 12:19:26 -04003667 def test_encode_trailing_space_at_maxlinelen(self):
3668 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
3669
R David Murrayec1b5b82011-03-23 14:19:05 -04003670 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04003671 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
3672
3673 def test_encode_whitespace_lines(self):
3674 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04003675
3676 def test_encode_quoted_equals(self):
3677 self._test_encode('a = b', 'a =3D b')
3678
3679 def test_encode_one_long_string(self):
3680 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
3681
3682 def test_encode_one_long_line(self):
3683 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3684
3685 def test_encode_one_very_long_line(self):
3686 self._test_encode('x' * 200 + '\n',
3687 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
3688
3689 def test_encode_one_long_line(self):
3690 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3691
3692 def test_encode_shortest_maxlinelen(self):
3693 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003694
R David Murrayb938c8c2011-03-24 12:19:26 -04003695 def test_encode_maxlinelen_too_small(self):
3696 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
3697
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003698 def test_encode(self):
3699 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003700 eq(quoprimime.body_encode(''), '')
3701 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003702 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003703 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003704 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003705 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003706xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3707 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3708x xxxx xxxx xxxx xxxx=20""")
3709 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003710 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3711 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003712xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3713 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3714x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003715 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003716one line
3717
3718two line"""), """\
3719one line
3720
3721two line""")
3722
3723
Ezio Melottib3aedd42010-11-20 19:04:17 +00003724
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003725# Test the Charset class
3726class TestCharset(unittest.TestCase):
3727 def tearDown(self):
3728 from email import charset as CharsetModule
3729 try:
3730 del CharsetModule.CHARSETS['fake']
3731 except KeyError:
3732 pass
3733
Guido van Rossum9604e662007-08-30 03:46:43 +00003734 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003735 eq = self.assertEqual
3736 # Make sure us-ascii = no Unicode conversion
3737 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00003738 eq(c.header_encode('Hello World!'), 'Hello World!')
3739 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003740 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00003741 self.assertRaises(UnicodeError, c.header_encode, s)
3742 c = Charset('utf-8')
3743 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003744
3745 def test_body_encode(self):
3746 eq = self.assertEqual
3747 # Try a charset with QP body encoding
3748 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003749 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003750 # Try a charset with Base64 body encoding
3751 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003752 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003753 # Try a charset with None body encoding
3754 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003755 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003756 # Try the convert argument, where input codec != output codec
3757 c = Charset('euc-jp')
3758 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00003759 # XXX FIXME
3760## try:
3761## eq('\x1b$B5FCO;~IW\x1b(B',
3762## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
3763## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
3764## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
3765## except LookupError:
3766## # We probably don't have the Japanese codecs installed
3767## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003768 # Testing SF bug #625509, which we have to fake, since there are no
3769 # built-in encodings where the header encoding is QP but the body
3770 # encoding is not.
3771 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04003772 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003773 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04003774 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003775
3776 def test_unicode_charset_name(self):
3777 charset = Charset('us-ascii')
3778 self.assertEqual(str(charset), 'us-ascii')
3779 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
3780
3781
Ezio Melottib3aedd42010-11-20 19:04:17 +00003782
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003783# Test multilingual MIME headers.
3784class TestHeader(TestEmailBase):
3785 def test_simple(self):
3786 eq = self.ndiffAssertEqual
3787 h = Header('Hello World!')
3788 eq(h.encode(), 'Hello World!')
3789 h.append(' Goodbye World!')
3790 eq(h.encode(), 'Hello World! Goodbye World!')
3791
3792 def test_simple_surprise(self):
3793 eq = self.ndiffAssertEqual
3794 h = Header('Hello World!')
3795 eq(h.encode(), 'Hello World!')
3796 h.append('Goodbye World!')
3797 eq(h.encode(), 'Hello World! Goodbye World!')
3798
3799 def test_header_needs_no_decoding(self):
3800 h = 'no decoding needed'
3801 self.assertEqual(decode_header(h), [(h, None)])
3802
3803 def test_long(self):
3804 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
3805 maxlinelen=76)
3806 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003807 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003808
3809 def test_multilingual(self):
3810 eq = self.ndiffAssertEqual
3811 g = Charset("iso-8859-1")
3812 cz = Charset("iso-8859-2")
3813 utf8 = Charset("utf-8")
3814 g_head = (b'Die Mieter treten hier ein werden mit einem '
3815 b'Foerderband komfortabel den Korridor entlang, '
3816 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
3817 b'gegen die rotierenden Klingen bef\xf6rdert. ')
3818 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
3819 b'd\xf9vtipu.. ')
3820 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
3821 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
3822 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
3823 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
3824 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
3825 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
3826 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
3827 '\u3044\u307e\u3059\u3002')
3828 h = Header(g_head, g)
3829 h.append(cz_head, cz)
3830 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00003831 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003832 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00003833=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
3834 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
3835 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
3836 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003837 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
3838 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
3839 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
3840 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00003841 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
3842 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
3843 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
3844 decoded = decode_header(enc)
3845 eq(len(decoded), 3)
3846 eq(decoded[0], (g_head, 'iso-8859-1'))
3847 eq(decoded[1], (cz_head, 'iso-8859-2'))
3848 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003849 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00003850 eq(ustr,
3851 (b'Die Mieter treten hier ein werden mit einem Foerderband '
3852 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
3853 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
3854 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
3855 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
3856 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
3857 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
3858 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
3859 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
3860 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
3861 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
3862 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
3863 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
3864 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
3865 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
3866 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
3867 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003868 # Test make_header()
3869 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00003870 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003871
3872 def test_empty_header_encode(self):
3873 h = Header()
3874 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00003875
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003876 def test_header_ctor_default_args(self):
3877 eq = self.ndiffAssertEqual
3878 h = Header()
3879 eq(h, '')
3880 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00003881 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003882
3883 def test_explicit_maxlinelen(self):
3884 eq = self.ndiffAssertEqual
3885 hstr = ('A very long line that must get split to something other '
3886 'than at the 76th character boundary to test the non-default '
3887 'behavior')
3888 h = Header(hstr)
3889 eq(h.encode(), '''\
3890A very long line that must get split to something other than at the 76th
3891 character boundary to test the non-default behavior''')
3892 eq(str(h), hstr)
3893 h = Header(hstr, header_name='Subject')
3894 eq(h.encode(), '''\
3895A very long line that must get split to something other than at the
3896 76th character boundary to test the non-default behavior''')
3897 eq(str(h), hstr)
3898 h = Header(hstr, maxlinelen=1024, header_name='Subject')
3899 eq(h.encode(), hstr)
3900 eq(str(h), hstr)
3901
Guido van Rossum9604e662007-08-30 03:46:43 +00003902 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003903 eq = self.ndiffAssertEqual
3904 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003905 x = 'xxxx ' * 20
3906 h.append(x)
3907 s = h.encode()
3908 eq(s, """\
3909=?iso-8859-1?q?xxx?=
3910 =?iso-8859-1?q?x_?=
3911 =?iso-8859-1?q?xx?=
3912 =?iso-8859-1?q?xx?=
3913 =?iso-8859-1?q?_x?=
3914 =?iso-8859-1?q?xx?=
3915 =?iso-8859-1?q?x_?=
3916 =?iso-8859-1?q?xx?=
3917 =?iso-8859-1?q?xx?=
3918 =?iso-8859-1?q?_x?=
3919 =?iso-8859-1?q?xx?=
3920 =?iso-8859-1?q?x_?=
3921 =?iso-8859-1?q?xx?=
3922 =?iso-8859-1?q?xx?=
3923 =?iso-8859-1?q?_x?=
3924 =?iso-8859-1?q?xx?=
3925 =?iso-8859-1?q?x_?=
3926 =?iso-8859-1?q?xx?=
3927 =?iso-8859-1?q?xx?=
3928 =?iso-8859-1?q?_x?=
3929 =?iso-8859-1?q?xx?=
3930 =?iso-8859-1?q?x_?=
3931 =?iso-8859-1?q?xx?=
3932 =?iso-8859-1?q?xx?=
3933 =?iso-8859-1?q?_x?=
3934 =?iso-8859-1?q?xx?=
3935 =?iso-8859-1?q?x_?=
3936 =?iso-8859-1?q?xx?=
3937 =?iso-8859-1?q?xx?=
3938 =?iso-8859-1?q?_x?=
3939 =?iso-8859-1?q?xx?=
3940 =?iso-8859-1?q?x_?=
3941 =?iso-8859-1?q?xx?=
3942 =?iso-8859-1?q?xx?=
3943 =?iso-8859-1?q?_x?=
3944 =?iso-8859-1?q?xx?=
3945 =?iso-8859-1?q?x_?=
3946 =?iso-8859-1?q?xx?=
3947 =?iso-8859-1?q?xx?=
3948 =?iso-8859-1?q?_x?=
3949 =?iso-8859-1?q?xx?=
3950 =?iso-8859-1?q?x_?=
3951 =?iso-8859-1?q?xx?=
3952 =?iso-8859-1?q?xx?=
3953 =?iso-8859-1?q?_x?=
3954 =?iso-8859-1?q?xx?=
3955 =?iso-8859-1?q?x_?=
3956 =?iso-8859-1?q?xx?=
3957 =?iso-8859-1?q?xx?=
3958 =?iso-8859-1?q?_?=""")
3959 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003960 h = Header(charset='iso-8859-1', maxlinelen=40)
3961 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003962 s = h.encode()
3963 eq(s, """\
3964=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
3965 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
3966 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
3967 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
3968 =?iso-8859-1?q?_xxxx_xxxx_?=""")
3969 eq(x, str(make_header(decode_header(s))))
3970
3971 def test_base64_splittable(self):
3972 eq = self.ndiffAssertEqual
3973 h = Header(charset='koi8-r', maxlinelen=20)
3974 x = 'xxxx ' * 20
3975 h.append(x)
3976 s = h.encode()
3977 eq(s, """\
3978=?koi8-r?b?eHh4?=
3979 =?koi8-r?b?eCB4?=
3980 =?koi8-r?b?eHh4?=
3981 =?koi8-r?b?IHh4?=
3982 =?koi8-r?b?eHgg?=
3983 =?koi8-r?b?eHh4?=
3984 =?koi8-r?b?eCB4?=
3985 =?koi8-r?b?eHh4?=
3986 =?koi8-r?b?IHh4?=
3987 =?koi8-r?b?eHgg?=
3988 =?koi8-r?b?eHh4?=
3989 =?koi8-r?b?eCB4?=
3990 =?koi8-r?b?eHh4?=
3991 =?koi8-r?b?IHh4?=
3992 =?koi8-r?b?eHgg?=
3993 =?koi8-r?b?eHh4?=
3994 =?koi8-r?b?eCB4?=
3995 =?koi8-r?b?eHh4?=
3996 =?koi8-r?b?IHh4?=
3997 =?koi8-r?b?eHgg?=
3998 =?koi8-r?b?eHh4?=
3999 =?koi8-r?b?eCB4?=
4000 =?koi8-r?b?eHh4?=
4001 =?koi8-r?b?IHh4?=
4002 =?koi8-r?b?eHgg?=
4003 =?koi8-r?b?eHh4?=
4004 =?koi8-r?b?eCB4?=
4005 =?koi8-r?b?eHh4?=
4006 =?koi8-r?b?IHh4?=
4007 =?koi8-r?b?eHgg?=
4008 =?koi8-r?b?eHh4?=
4009 =?koi8-r?b?eCB4?=
4010 =?koi8-r?b?eHh4?=
4011 =?koi8-r?b?IA==?=""")
4012 eq(x, str(make_header(decode_header(s))))
4013 h = Header(charset='koi8-r', maxlinelen=40)
4014 h.append(x)
4015 s = h.encode()
4016 eq(s, """\
4017=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4018 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4019 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4020 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4021 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4022 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4023 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004024
4025 def test_us_ascii_header(self):
4026 eq = self.assertEqual
4027 s = 'hello'
4028 x = decode_header(s)
4029 eq(x, [('hello', None)])
4030 h = make_header(x)
4031 eq(s, h.encode())
4032
4033 def test_string_charset(self):
4034 eq = self.assertEqual
4035 h = Header()
4036 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004037 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004038
4039## def test_unicode_error(self):
4040## raises = self.assertRaises
4041## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4042## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4043## h = Header()
4044## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4045## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4046## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4047
4048 def test_utf8_shortest(self):
4049 eq = self.assertEqual
4050 h = Header('p\xf6stal', 'utf-8')
4051 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4052 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4053 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4054
4055 def test_bad_8bit_header(self):
4056 raises = self.assertRaises
4057 eq = self.assertEqual
4058 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4059 raises(UnicodeError, Header, x)
4060 h = Header()
4061 raises(UnicodeError, h.append, x)
4062 e = x.decode('utf-8', 'replace')
4063 eq(str(Header(x, errors='replace')), e)
4064 h.append(x, errors='replace')
4065 eq(str(h), e)
4066
R David Murray041015c2011-03-25 15:10:55 -04004067 def test_escaped_8bit_header(self):
4068 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4069 x = x.decode('ascii', 'surrogateescape')
4070 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4071 self.assertEqual(str(h),
4072 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4073 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4074
4075 def test_modify_returned_list_does_not_change_header(self):
4076 h = Header('test')
4077 chunks = email.header.decode_header(h)
4078 chunks.append(('ascii', 'test2'))
4079 self.assertEqual(str(h), 'test')
4080
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004081 def test_encoded_adjacent_nonencoded(self):
4082 eq = self.assertEqual
4083 h = Header()
4084 h.append('hello', 'iso-8859-1')
4085 h.append('world')
4086 s = h.encode()
4087 eq(s, '=?iso-8859-1?q?hello?= world')
4088 h = make_header(decode_header(s))
4089 eq(h.encode(), s)
4090
4091 def test_whitespace_eater(self):
4092 eq = self.assertEqual
4093 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4094 parts = decode_header(s)
4095 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
4096 hdr = make_header(parts)
4097 eq(hdr.encode(),
4098 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4099
4100 def test_broken_base64_header(self):
4101 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004102 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004103 raises(errors.HeaderParseError, decode_header, s)
4104
R. David Murray477efb32011-01-05 01:39:32 +00004105 def test_shift_jis_charset(self):
4106 h = Header('文', charset='shift_jis')
4107 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4108
R David Murrayde912762011-03-16 18:26:23 -04004109 def test_flatten_header_with_no_value(self):
4110 # Issue 11401 (regression from email 4.x) Note that the space after
4111 # the header doesn't reflect the input, but this is also the way
4112 # email 4.x behaved. At some point it would be nice to fix that.
4113 msg = email.message_from_string("EmptyHeader:")
4114 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4115
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004116
Ezio Melottib3aedd42010-11-20 19:04:17 +00004117
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004118# Test RFC 2231 header parameters (en/de)coding
4119class TestRFC2231(TestEmailBase):
4120 def test_get_param(self):
4121 eq = self.assertEqual
4122 msg = self._msgobj('msg_29.txt')
4123 eq(msg.get_param('title'),
4124 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4125 eq(msg.get_param('title', unquote=False),
4126 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4127
4128 def test_set_param(self):
4129 eq = self.ndiffAssertEqual
4130 msg = Message()
4131 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4132 charset='us-ascii')
4133 eq(msg.get_param('title'),
4134 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4135 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4136 charset='us-ascii', language='en')
4137 eq(msg.get_param('title'),
4138 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4139 msg = self._msgobj('msg_01.txt')
4140 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4141 charset='us-ascii', language='en')
4142 eq(msg.as_string(maxheaderlen=78), """\
4143Return-Path: <bbb@zzz.org>
4144Delivered-To: bbb@zzz.org
4145Received: by mail.zzz.org (Postfix, from userid 889)
4146\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4147MIME-Version: 1.0
4148Content-Transfer-Encoding: 7bit
4149Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4150From: bbb@ddd.com (John X. Doe)
4151To: bbb@zzz.org
4152Subject: This is a test message
4153Date: Fri, 4 May 2001 14:05:44 -0400
4154Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004155 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004156
4157
4158Hi,
4159
4160Do you like this message?
4161
4162-Me
4163""")
4164
R David Murraya2860e82011-04-16 09:20:30 -04004165 def test_set_param_requote(self):
4166 msg = Message()
4167 msg.set_param('title', 'foo')
4168 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4169 msg.set_param('title', 'bar', requote=False)
4170 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4171 # tspecial is still quoted.
4172 msg.set_param('title', "(bar)bell", requote=False)
4173 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4174
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004175 def test_del_param(self):
4176 eq = self.ndiffAssertEqual
4177 msg = self._msgobj('msg_01.txt')
4178 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4179 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4180 charset='us-ascii', language='en')
4181 msg.del_param('foo', header='Content-Type')
4182 eq(msg.as_string(maxheaderlen=78), """\
4183Return-Path: <bbb@zzz.org>
4184Delivered-To: bbb@zzz.org
4185Received: by mail.zzz.org (Postfix, from userid 889)
4186\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4187MIME-Version: 1.0
4188Content-Transfer-Encoding: 7bit
4189Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4190From: bbb@ddd.com (John X. Doe)
4191To: bbb@zzz.org
4192Subject: This is a test message
4193Date: Fri, 4 May 2001 14:05:44 -0400
4194Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004195 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004196
4197
4198Hi,
4199
4200Do you like this message?
4201
4202-Me
4203""")
4204
4205 def test_rfc2231_get_content_charset(self):
4206 eq = self.assertEqual
4207 msg = self._msgobj('msg_32.txt')
4208 eq(msg.get_content_charset(), 'us-ascii')
4209
R. David Murraydfd7eb02010-12-24 22:36:49 +00004210 def test_rfc2231_parse_rfc_quoting(self):
4211 m = textwrap.dedent('''\
4212 Content-Disposition: inline;
4213 \tfilename*0*=''This%20is%20even%20more%20;
4214 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4215 \tfilename*2="is it not.pdf"
4216
4217 ''')
4218 msg = email.message_from_string(m)
4219 self.assertEqual(msg.get_filename(),
4220 'This is even more ***fun*** is it not.pdf')
4221 self.assertEqual(m, msg.as_string())
4222
4223 def test_rfc2231_parse_extra_quoting(self):
4224 m = textwrap.dedent('''\
4225 Content-Disposition: inline;
4226 \tfilename*0*="''This%20is%20even%20more%20";
4227 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4228 \tfilename*2="is it not.pdf"
4229
4230 ''')
4231 msg = email.message_from_string(m)
4232 self.assertEqual(msg.get_filename(),
4233 'This is even more ***fun*** is it not.pdf')
4234 self.assertEqual(m, msg.as_string())
4235
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004236 def test_rfc2231_no_language_or_charset(self):
4237 m = '''\
4238Content-Transfer-Encoding: 8bit
4239Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4240Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4241
4242'''
4243 msg = email.message_from_string(m)
4244 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004245 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004246 self.assertEqual(
4247 param,
4248 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4249
4250 def test_rfc2231_no_language_or_charset_in_filename(self):
4251 m = '''\
4252Content-Disposition: inline;
4253\tfilename*0*="''This%20is%20even%20more%20";
4254\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4255\tfilename*2="is it not.pdf"
4256
4257'''
4258 msg = email.message_from_string(m)
4259 self.assertEqual(msg.get_filename(),
4260 'This is even more ***fun*** is it not.pdf')
4261
4262 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4263 m = '''\
4264Content-Disposition: inline;
4265\tfilename*0*="''This%20is%20even%20more%20";
4266\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4267\tfilename*2="is it not.pdf"
4268
4269'''
4270 msg = email.message_from_string(m)
4271 self.assertEqual(msg.get_filename(),
4272 'This is even more ***fun*** is it not.pdf')
4273
4274 def test_rfc2231_partly_encoded(self):
4275 m = '''\
4276Content-Disposition: inline;
4277\tfilename*0="''This%20is%20even%20more%20";
4278\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4279\tfilename*2="is it not.pdf"
4280
4281'''
4282 msg = email.message_from_string(m)
4283 self.assertEqual(
4284 msg.get_filename(),
4285 'This%20is%20even%20more%20***fun*** is it not.pdf')
4286
4287 def test_rfc2231_partly_nonencoded(self):
4288 m = '''\
4289Content-Disposition: inline;
4290\tfilename*0="This%20is%20even%20more%20";
4291\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4292\tfilename*2="is it not.pdf"
4293
4294'''
4295 msg = email.message_from_string(m)
4296 self.assertEqual(
4297 msg.get_filename(),
4298 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4299
4300 def test_rfc2231_no_language_or_charset_in_boundary(self):
4301 m = '''\
4302Content-Type: multipart/alternative;
4303\tboundary*0*="''This%20is%20even%20more%20";
4304\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4305\tboundary*2="is it not.pdf"
4306
4307'''
4308 msg = email.message_from_string(m)
4309 self.assertEqual(msg.get_boundary(),
4310 'This is even more ***fun*** is it not.pdf')
4311
4312 def test_rfc2231_no_language_or_charset_in_charset(self):
4313 # This is a nonsensical charset value, but tests the code anyway
4314 m = '''\
4315Content-Type: text/plain;
4316\tcharset*0*="This%20is%20even%20more%20";
4317\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4318\tcharset*2="is it not.pdf"
4319
4320'''
4321 msg = email.message_from_string(m)
4322 self.assertEqual(msg.get_content_charset(),
4323 'this is even more ***fun*** is it not.pdf')
4324
4325 def test_rfc2231_bad_encoding_in_filename(self):
4326 m = '''\
4327Content-Disposition: inline;
4328\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4329\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4330\tfilename*2="is it not.pdf"
4331
4332'''
4333 msg = email.message_from_string(m)
4334 self.assertEqual(msg.get_filename(),
4335 'This is even more ***fun*** is it not.pdf')
4336
4337 def test_rfc2231_bad_encoding_in_charset(self):
4338 m = """\
4339Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4340
4341"""
4342 msg = email.message_from_string(m)
4343 # This should return None because non-ascii characters in the charset
4344 # are not allowed.
4345 self.assertEqual(msg.get_content_charset(), None)
4346
4347 def test_rfc2231_bad_character_in_charset(self):
4348 m = """\
4349Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4350
4351"""
4352 msg = email.message_from_string(m)
4353 # This should return None because non-ascii characters in the charset
4354 # are not allowed.
4355 self.assertEqual(msg.get_content_charset(), None)
4356
4357 def test_rfc2231_bad_character_in_filename(self):
4358 m = '''\
4359Content-Disposition: inline;
4360\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4361\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4362\tfilename*2*="is it not.pdf%E2"
4363
4364'''
4365 msg = email.message_from_string(m)
4366 self.assertEqual(msg.get_filename(),
4367 'This is even more ***fun*** is it not.pdf\ufffd')
4368
4369 def test_rfc2231_unknown_encoding(self):
4370 m = """\
4371Content-Transfer-Encoding: 8bit
4372Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4373
4374"""
4375 msg = email.message_from_string(m)
4376 self.assertEqual(msg.get_filename(), 'myfile.txt')
4377
4378 def test_rfc2231_single_tick_in_filename_extended(self):
4379 eq = self.assertEqual
4380 m = """\
4381Content-Type: application/x-foo;
4382\tname*0*=\"Frank's\"; name*1*=\" Document\"
4383
4384"""
4385 msg = email.message_from_string(m)
4386 charset, language, s = msg.get_param('name')
4387 eq(charset, None)
4388 eq(language, None)
4389 eq(s, "Frank's Document")
4390
4391 def test_rfc2231_single_tick_in_filename(self):
4392 m = """\
4393Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4394
4395"""
4396 msg = email.message_from_string(m)
4397 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004398 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004399 self.assertEqual(param, "Frank's Document")
4400
4401 def test_rfc2231_tick_attack_extended(self):
4402 eq = self.assertEqual
4403 m = """\
4404Content-Type: application/x-foo;
4405\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4406
4407"""
4408 msg = email.message_from_string(m)
4409 charset, language, s = msg.get_param('name')
4410 eq(charset, 'us-ascii')
4411 eq(language, 'en-us')
4412 eq(s, "Frank's Document")
4413
4414 def test_rfc2231_tick_attack(self):
4415 m = """\
4416Content-Type: application/x-foo;
4417\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4418
4419"""
4420 msg = email.message_from_string(m)
4421 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004422 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004423 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4424
4425 def test_rfc2231_no_extended_values(self):
4426 eq = self.assertEqual
4427 m = """\
4428Content-Type: application/x-foo; name=\"Frank's Document\"
4429
4430"""
4431 msg = email.message_from_string(m)
4432 eq(msg.get_param('name'), "Frank's Document")
4433
4434 def test_rfc2231_encoded_then_unencoded_segments(self):
4435 eq = self.assertEqual
4436 m = """\
4437Content-Type: application/x-foo;
4438\tname*0*=\"us-ascii'en-us'My\";
4439\tname*1=\" Document\";
4440\tname*2*=\" For You\"
4441
4442"""
4443 msg = email.message_from_string(m)
4444 charset, language, s = msg.get_param('name')
4445 eq(charset, 'us-ascii')
4446 eq(language, 'en-us')
4447 eq(s, 'My Document For You')
4448
4449 def test_rfc2231_unencoded_then_encoded_segments(self):
4450 eq = self.assertEqual
4451 m = """\
4452Content-Type: application/x-foo;
4453\tname*0=\"us-ascii'en-us'My\";
4454\tname*1*=\" Document\";
4455\tname*2*=\" For You\"
4456
4457"""
4458 msg = email.message_from_string(m)
4459 charset, language, s = msg.get_param('name')
4460 eq(charset, 'us-ascii')
4461 eq(language, 'en-us')
4462 eq(s, 'My Document For You')
4463
4464
Ezio Melottib3aedd42010-11-20 19:04:17 +00004465
R. David Murraya8f480f2010-01-16 18:30:03 +00004466# Tests to ensure that signed parts of an email are completely preserved, as
4467# required by RFC1847 section 2.1. Note that these are incomplete, because the
4468# email package does not currently always preserve the body. See issue 1670765.
4469class TestSigned(TestEmailBase):
4470
4471 def _msg_and_obj(self, filename):
4472 with openfile(findfile(filename)) as fp:
4473 original = fp.read()
4474 msg = email.message_from_string(original)
4475 return original, msg
4476
4477 def _signed_parts_eq(self, original, result):
4478 # Extract the first mime part of each message
4479 import re
4480 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4481 inpart = repart.search(original).group(2)
4482 outpart = repart.search(result).group(2)
4483 self.assertEqual(outpart, inpart)
4484
4485 def test_long_headers_as_string(self):
4486 original, msg = self._msg_and_obj('msg_45.txt')
4487 result = msg.as_string()
4488 self._signed_parts_eq(original, result)
4489
4490 def test_long_headers_as_string_maxheaderlen(self):
4491 original, msg = self._msg_and_obj('msg_45.txt')
4492 result = msg.as_string(maxheaderlen=60)
4493 self._signed_parts_eq(original, result)
4494
4495 def test_long_headers_flatten(self):
4496 original, msg = self._msg_and_obj('msg_45.txt')
4497 fp = StringIO()
4498 Generator(fp).flatten(msg)
4499 result = fp.getvalue()
4500 self._signed_parts_eq(original, result)
4501
4502
Ezio Melottib3aedd42010-11-20 19:04:17 +00004503
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004504def _testclasses():
4505 mod = sys.modules[__name__]
4506 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
4507
4508
4509def suite():
4510 suite = unittest.TestSuite()
4511 for testclass in _testclasses():
4512 suite.addTest(unittest.makeSuite(testclass))
4513 return suite
4514
4515
4516def test_main():
4517 for testclass in _testclasses():
4518 run_unittest(testclass)
4519
4520
Ezio Melottib3aedd42010-11-20 19:04:17 +00004521
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004522if __name__ == '__main__':
4523 unittest.main(defaultTest='suite')