blob: f43bb38aa8d52a042b375e3fd1f79b89a1303b7d [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R. David Murray96fd54e2010-10-08 15:55:28 +000039from test.support import findfile, run_unittest, unlink
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040from email.test import __file__ as landmark
41
42
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Ezio Melottib3aedd42010-11-20 19:04:17 +000048
Guido van Rossum8b3febe2007-08-30 01:15:14 +000049def openfile(filename, *args, **kws):
50 path = os.path.join(os.path.dirname(landmark), 'data', filename)
51 return open(path, *args, **kws)
52
53
Ezio Melottib3aedd42010-11-20 19:04:17 +000054
Guido van Rossum8b3febe2007-08-30 01:15:14 +000055# Base test class
56class TestEmailBase(unittest.TestCase):
57 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000058 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000059 if first != second:
60 sfirst = str(first)
61 ssecond = str(second)
62 rfirst = [repr(line) for line in sfirst.splitlines()]
63 rsecond = [repr(line) for line in ssecond.splitlines()]
64 diff = difflib.ndiff(rfirst, rsecond)
65 raise self.failureException(NL + NL.join(diff))
66
67 def _msgobj(self, filename):
68 with openfile(findfile(filename)) as fp:
69 return email.message_from_file(fp)
70
71
Ezio Melottib3aedd42010-11-20 19:04:17 +000072
Guido van Rossum8b3febe2007-08-30 01:15:14 +000073# Test various aspects of the Message class's API
74class TestMessageAPI(TestEmailBase):
75 def test_get_all(self):
76 eq = self.assertEqual
77 msg = self._msgobj('msg_20.txt')
78 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
79 eq(msg.get_all('xx', 'n/a'), 'n/a')
80
R. David Murraye5db2632010-11-20 15:10:13 +000081 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 eq = self.assertEqual
83 msg = Message()
84 eq(msg.get_charset(), None)
85 charset = Charset('iso-8859-1')
86 msg.set_charset(charset)
87 eq(msg['mime-version'], '1.0')
88 eq(msg.get_content_type(), 'text/plain')
89 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
90 eq(msg.get_param('charset'), 'iso-8859-1')
91 eq(msg['content-transfer-encoding'], 'quoted-printable')
92 eq(msg.get_charset().input_charset, 'iso-8859-1')
93 # Remove the charset
94 msg.set_charset(None)
95 eq(msg.get_charset(), None)
96 eq(msg['content-type'], 'text/plain')
97 # Try adding a charset when there's already MIME headers present
98 msg = Message()
99 msg['MIME-Version'] = '2.0'
100 msg['Content-Type'] = 'text/x-weird'
101 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
102 msg.set_charset(charset)
103 eq(msg['mime-version'], '2.0')
104 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
105 eq(msg['content-transfer-encoding'], 'quinted-puntable')
106
107 def test_set_charset_from_string(self):
108 eq = self.assertEqual
109 msg = Message()
110 msg.set_charset('us-ascii')
111 eq(msg.get_charset().input_charset, 'us-ascii')
112 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
113
114 def test_set_payload_with_charset(self):
115 msg = Message()
116 charset = Charset('iso-8859-1')
117 msg.set_payload('This is a string payload', charset)
118 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
119
120 def test_get_charsets(self):
121 eq = self.assertEqual
122
123 msg = self._msgobj('msg_08.txt')
124 charsets = msg.get_charsets()
125 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
126
127 msg = self._msgobj('msg_09.txt')
128 charsets = msg.get_charsets('dingbat')
129 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
130 'koi8-r'])
131
132 msg = self._msgobj('msg_12.txt')
133 charsets = msg.get_charsets()
134 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
135 'iso-8859-3', 'us-ascii', 'koi8-r'])
136
137 def test_get_filename(self):
138 eq = self.assertEqual
139
140 msg = self._msgobj('msg_04.txt')
141 filenames = [p.get_filename() for p in msg.get_payload()]
142 eq(filenames, ['msg.txt', 'msg.txt'])
143
144 msg = self._msgobj('msg_07.txt')
145 subpart = msg.get_payload(1)
146 eq(subpart.get_filename(), 'dingusfish.gif')
147
148 def test_get_filename_with_name_parameter(self):
149 eq = self.assertEqual
150
151 msg = self._msgobj('msg_44.txt')
152 filenames = [p.get_filename() for p in msg.get_payload()]
153 eq(filenames, ['msg.txt', 'msg.txt'])
154
155 def test_get_boundary(self):
156 eq = self.assertEqual
157 msg = self._msgobj('msg_07.txt')
158 # No quotes!
159 eq(msg.get_boundary(), 'BOUNDARY')
160
161 def test_set_boundary(self):
162 eq = self.assertEqual
163 # This one has no existing boundary parameter, but the Content-Type:
164 # header appears fifth.
165 msg = self._msgobj('msg_01.txt')
166 msg.set_boundary('BOUNDARY')
167 header, value = msg.items()[4]
168 eq(header.lower(), 'content-type')
169 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
170 # This one has a Content-Type: header, with a boundary, stuck in the
171 # middle of its headers. Make sure the order is preserved; it should
172 # be fifth.
173 msg = self._msgobj('msg_04.txt')
174 msg.set_boundary('BOUNDARY')
175 header, value = msg.items()[4]
176 eq(header.lower(), 'content-type')
177 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
178 # And this one has no Content-Type: header at all.
179 msg = self._msgobj('msg_03.txt')
180 self.assertRaises(errors.HeaderParseError,
181 msg.set_boundary, 'BOUNDARY')
182
R. David Murray73a559d2010-12-21 18:07:59 +0000183 def test_make_boundary(self):
184 msg = MIMEMultipart('form-data')
185 # Note that when the boundary gets created is an implementation
186 # detail and might change.
187 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
188 # Trigger creation of boundary
189 msg.as_string()
190 self.assertEqual(msg.items()[0][1][:33],
191 'multipart/form-data; boundary="==')
192 # XXX: there ought to be tests of the uniqueness of the boundary, too.
193
R. David Murray57c45ac2010-02-21 04:39:40 +0000194 def test_message_rfc822_only(self):
195 # Issue 7970: message/rfc822 not in multipart parsed by
196 # HeaderParser caused an exception when flattened.
Brett Cannon384917a2010-10-29 23:08:36 +0000197 with openfile(findfile('msg_46.txt')) as fp:
198 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000199 parser = HeaderParser()
200 msg = parser.parsestr(msgdata)
201 out = StringIO()
202 gen = Generator(out, True, 0)
203 gen.flatten(msg, False)
204 self.assertEqual(out.getvalue(), msgdata)
205
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000206 def test_get_decoded_payload(self):
207 eq = self.assertEqual
208 msg = self._msgobj('msg_10.txt')
209 # The outer message is a multipart
210 eq(msg.get_payload(decode=True), None)
211 # Subpart 1 is 7bit encoded
212 eq(msg.get_payload(0).get_payload(decode=True),
213 b'This is a 7bit encoded message.\n')
214 # Subpart 2 is quopri
215 eq(msg.get_payload(1).get_payload(decode=True),
216 b'\xa1This is a Quoted Printable encoded message!\n')
217 # Subpart 3 is base64
218 eq(msg.get_payload(2).get_payload(decode=True),
219 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000220 # Subpart 4 is base64 with a trailing newline, which
221 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000222 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000223 b'This is a Base64 encoded message.\n')
224 # Subpart 5 has no Content-Transfer-Encoding: header.
225 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000226 b'This has no Content-Transfer-Encoding: header.\n')
227
228 def test_get_decoded_uu_payload(self):
229 eq = self.assertEqual
230 msg = Message()
231 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
232 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
233 msg['content-transfer-encoding'] = cte
234 eq(msg.get_payload(decode=True), b'hello world')
235 # Now try some bogus data
236 msg.set_payload('foo')
237 eq(msg.get_payload(decode=True), b'foo')
238
R David Murraya2860e82011-04-16 09:20:30 -0400239 def test_get_payload_n_raises_on_non_multipart(self):
240 msg = Message()
241 self.assertRaises(TypeError, msg.get_payload, 1)
242
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000243 def test_decoded_generator(self):
244 eq = self.assertEqual
245 msg = self._msgobj('msg_07.txt')
246 with openfile('msg_17.txt') as fp:
247 text = fp.read()
248 s = StringIO()
249 g = DecodedGenerator(s)
250 g.flatten(msg)
251 eq(s.getvalue(), text)
252
253 def test__contains__(self):
254 msg = Message()
255 msg['From'] = 'Me'
256 msg['to'] = 'You'
257 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000258 self.assertTrue('from' in msg)
259 self.assertTrue('From' in msg)
260 self.assertTrue('FROM' in msg)
261 self.assertTrue('to' in msg)
262 self.assertTrue('To' in msg)
263 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000264
265 def test_as_string(self):
266 eq = self.ndiffAssertEqual
267 msg = self._msgobj('msg_01.txt')
268 with openfile('msg_01.txt') as fp:
269 text = fp.read()
270 eq(text, str(msg))
271 fullrepr = msg.as_string(unixfrom=True)
272 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000273 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000274 eq(text, NL.join(lines[1:]))
275
276 def test_bad_param(self):
277 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
278 self.assertEqual(msg.get_param('baz'), '')
279
280 def test_missing_filename(self):
281 msg = email.message_from_string("From: foo\n")
282 self.assertEqual(msg.get_filename(), None)
283
284 def test_bogus_filename(self):
285 msg = email.message_from_string(
286 "Content-Disposition: blarg; filename\n")
287 self.assertEqual(msg.get_filename(), '')
288
289 def test_missing_boundary(self):
290 msg = email.message_from_string("From: foo\n")
291 self.assertEqual(msg.get_boundary(), None)
292
293 def test_get_params(self):
294 eq = self.assertEqual
295 msg = email.message_from_string(
296 'X-Header: foo=one; bar=two; baz=three\n')
297 eq(msg.get_params(header='x-header'),
298 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
299 msg = email.message_from_string(
300 'X-Header: foo; bar=one; baz=two\n')
301 eq(msg.get_params(header='x-header'),
302 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
303 eq(msg.get_params(), None)
304 msg = email.message_from_string(
305 'X-Header: foo; bar="one"; baz=two\n')
306 eq(msg.get_params(header='x-header'),
307 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
308
309 def test_get_param_liberal(self):
310 msg = Message()
311 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
312 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
313
314 def test_get_param(self):
315 eq = self.assertEqual
316 msg = email.message_from_string(
317 "X-Header: foo=one; bar=two; baz=three\n")
318 eq(msg.get_param('bar', header='x-header'), 'two')
319 eq(msg.get_param('quuz', header='x-header'), None)
320 eq(msg.get_param('quuz'), None)
321 msg = email.message_from_string(
322 'X-Header: foo; bar="one"; baz=two\n')
323 eq(msg.get_param('foo', header='x-header'), '')
324 eq(msg.get_param('bar', header='x-header'), 'one')
325 eq(msg.get_param('baz', header='x-header'), 'two')
326 # XXX: We are not RFC-2045 compliant! We cannot parse:
327 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
328 # msg.get_param("weird")
329 # yet.
330
331 def test_get_param_funky_continuation_lines(self):
332 msg = self._msgobj('msg_22.txt')
333 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
334
335 def test_get_param_with_semis_in_quotes(self):
336 msg = email.message_from_string(
337 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
338 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
339 self.assertEqual(msg.get_param('name', unquote=False),
340 '"Jim&amp;&amp;Jill"')
341
R. David Murrayd48739f2010-04-14 18:59:18 +0000342 def test_get_param_with_quotes(self):
343 msg = email.message_from_string(
344 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
345 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
346 msg = email.message_from_string(
347 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
348 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
349
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000350 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000351 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000352 msg = email.message_from_string('Header: exists')
353 unless('header' in msg)
354 unless('Header' in msg)
355 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000356 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000357
358 def test_set_param(self):
359 eq = self.assertEqual
360 msg = Message()
361 msg.set_param('charset', 'iso-2022-jp')
362 eq(msg.get_param('charset'), 'iso-2022-jp')
363 msg.set_param('importance', 'high value')
364 eq(msg.get_param('importance'), 'high value')
365 eq(msg.get_param('importance', unquote=False), '"high value"')
366 eq(msg.get_params(), [('text/plain', ''),
367 ('charset', 'iso-2022-jp'),
368 ('importance', 'high value')])
369 eq(msg.get_params(unquote=False), [('text/plain', ''),
370 ('charset', '"iso-2022-jp"'),
371 ('importance', '"high value"')])
372 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
373 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
374
375 def test_del_param(self):
376 eq = self.assertEqual
377 msg = self._msgobj('msg_05.txt')
378 eq(msg.get_params(),
379 [('multipart/report', ''), ('report-type', 'delivery-status'),
380 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
381 old_val = msg.get_param("report-type")
382 msg.del_param("report-type")
383 eq(msg.get_params(),
384 [('multipart/report', ''),
385 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
386 msg.set_param("report-type", old_val)
387 eq(msg.get_params(),
388 [('multipart/report', ''),
389 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
390 ('report-type', old_val)])
391
392 def test_del_param_on_other_header(self):
393 msg = Message()
394 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
395 msg.del_param('filename', 'content-disposition')
396 self.assertEqual(msg['content-disposition'], 'attachment')
397
R David Murraya2860e82011-04-16 09:20:30 -0400398 def test_del_param_on_nonexistent_header(self):
399 msg = Message()
400 msg.del_param('filename', 'content-disposition')
401
402 def test_del_nonexistent_param(self):
403 msg = Message()
404 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
405 existing_header = msg['Content-Type']
406 msg.del_param('foobar', header='Content-Type')
407 self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"')
408
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000409 def test_set_type(self):
410 eq = self.assertEqual
411 msg = Message()
412 self.assertRaises(ValueError, msg.set_type, 'text')
413 msg.set_type('text/plain')
414 eq(msg['content-type'], 'text/plain')
415 msg.set_param('charset', 'us-ascii')
416 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
417 msg.set_type('text/html')
418 eq(msg['content-type'], 'text/html; charset="us-ascii"')
419
420 def test_set_type_on_other_header(self):
421 msg = Message()
422 msg['X-Content-Type'] = 'text/plain'
423 msg.set_type('application/octet-stream', 'X-Content-Type')
424 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
425
426 def test_get_content_type_missing(self):
427 msg = Message()
428 self.assertEqual(msg.get_content_type(), 'text/plain')
429
430 def test_get_content_type_missing_with_default_type(self):
431 msg = Message()
432 msg.set_default_type('message/rfc822')
433 self.assertEqual(msg.get_content_type(), 'message/rfc822')
434
435 def test_get_content_type_from_message_implicit(self):
436 msg = self._msgobj('msg_30.txt')
437 self.assertEqual(msg.get_payload(0).get_content_type(),
438 'message/rfc822')
439
440 def test_get_content_type_from_message_explicit(self):
441 msg = self._msgobj('msg_28.txt')
442 self.assertEqual(msg.get_payload(0).get_content_type(),
443 'message/rfc822')
444
445 def test_get_content_type_from_message_text_plain_implicit(self):
446 msg = self._msgobj('msg_03.txt')
447 self.assertEqual(msg.get_content_type(), 'text/plain')
448
449 def test_get_content_type_from_message_text_plain_explicit(self):
450 msg = self._msgobj('msg_01.txt')
451 self.assertEqual(msg.get_content_type(), 'text/plain')
452
453 def test_get_content_maintype_missing(self):
454 msg = Message()
455 self.assertEqual(msg.get_content_maintype(), 'text')
456
457 def test_get_content_maintype_missing_with_default_type(self):
458 msg = Message()
459 msg.set_default_type('message/rfc822')
460 self.assertEqual(msg.get_content_maintype(), 'message')
461
462 def test_get_content_maintype_from_message_implicit(self):
463 msg = self._msgobj('msg_30.txt')
464 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
465
466 def test_get_content_maintype_from_message_explicit(self):
467 msg = self._msgobj('msg_28.txt')
468 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
469
470 def test_get_content_maintype_from_message_text_plain_implicit(self):
471 msg = self._msgobj('msg_03.txt')
472 self.assertEqual(msg.get_content_maintype(), 'text')
473
474 def test_get_content_maintype_from_message_text_plain_explicit(self):
475 msg = self._msgobj('msg_01.txt')
476 self.assertEqual(msg.get_content_maintype(), 'text')
477
478 def test_get_content_subtype_missing(self):
479 msg = Message()
480 self.assertEqual(msg.get_content_subtype(), 'plain')
481
482 def test_get_content_subtype_missing_with_default_type(self):
483 msg = Message()
484 msg.set_default_type('message/rfc822')
485 self.assertEqual(msg.get_content_subtype(), 'rfc822')
486
487 def test_get_content_subtype_from_message_implicit(self):
488 msg = self._msgobj('msg_30.txt')
489 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
490
491 def test_get_content_subtype_from_message_explicit(self):
492 msg = self._msgobj('msg_28.txt')
493 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
494
495 def test_get_content_subtype_from_message_text_plain_implicit(self):
496 msg = self._msgobj('msg_03.txt')
497 self.assertEqual(msg.get_content_subtype(), 'plain')
498
499 def test_get_content_subtype_from_message_text_plain_explicit(self):
500 msg = self._msgobj('msg_01.txt')
501 self.assertEqual(msg.get_content_subtype(), 'plain')
502
503 def test_get_content_maintype_error(self):
504 msg = Message()
505 msg['Content-Type'] = 'no-slash-in-this-string'
506 self.assertEqual(msg.get_content_maintype(), 'text')
507
508 def test_get_content_subtype_error(self):
509 msg = Message()
510 msg['Content-Type'] = 'no-slash-in-this-string'
511 self.assertEqual(msg.get_content_subtype(), 'plain')
512
513 def test_replace_header(self):
514 eq = self.assertEqual
515 msg = Message()
516 msg.add_header('First', 'One')
517 msg.add_header('Second', 'Two')
518 msg.add_header('Third', 'Three')
519 eq(msg.keys(), ['First', 'Second', 'Third'])
520 eq(msg.values(), ['One', 'Two', 'Three'])
521 msg.replace_header('Second', 'Twenty')
522 eq(msg.keys(), ['First', 'Second', 'Third'])
523 eq(msg.values(), ['One', 'Twenty', 'Three'])
524 msg.add_header('First', 'Eleven')
525 msg.replace_header('First', 'One Hundred')
526 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
527 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
528 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
529
530 def test_broken_base64_payload(self):
531 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
532 msg = Message()
533 msg['content-type'] = 'audio/x-midi'
534 msg['content-transfer-encoding'] = 'base64'
535 msg.set_payload(x)
536 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000537 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000538
R David Murraya2860e82011-04-16 09:20:30 -0400539 def test_broken_unicode_payload(self):
540 # This test improves coverage but is not a compliance test.
541 # The behavior in this situation is currently undefined by the API.
542 x = 'this is a br\xf6ken thing to do'
543 msg = Message()
544 msg['content-type'] = 'text/plain'
545 msg['content-transfer-encoding'] = '8bit'
546 msg.set_payload(x)
547 self.assertEqual(msg.get_payload(decode=True),
548 bytes(x, 'raw-unicode-escape'))
549
550 def test_questionable_bytes_payload(self):
551 # This test improves coverage but is not a compliance test,
552 # since it involves poking inside the black box.
553 x = 'this is a quéstionable thing to do'.encode('utf-8')
554 msg = Message()
555 msg['content-type'] = 'text/plain; charset="utf-8"'
556 msg['content-transfer-encoding'] = '8bit'
557 msg._payload = x
558 self.assertEqual(msg.get_payload(decode=True), x)
559
R. David Murray7ec754b2010-12-13 23:51:19 +0000560 # Issue 1078919
561 def test_ascii_add_header(self):
562 msg = Message()
563 msg.add_header('Content-Disposition', 'attachment',
564 filename='bud.gif')
565 self.assertEqual('attachment; filename="bud.gif"',
566 msg['Content-Disposition'])
567
568 def test_noascii_add_header(self):
569 msg = Message()
570 msg.add_header('Content-Disposition', 'attachment',
571 filename="Fußballer.ppt")
572 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000573 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000574 msg['Content-Disposition'])
575
576 def test_nonascii_add_header_via_triple(self):
577 msg = Message()
578 msg.add_header('Content-Disposition', 'attachment',
579 filename=('iso-8859-1', '', 'Fußballer.ppt'))
580 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000581 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
582 msg['Content-Disposition'])
583
584 def test_ascii_add_header_with_tspecial(self):
585 msg = Message()
586 msg.add_header('Content-Disposition', 'attachment',
587 filename="windows [filename].ppt")
588 self.assertEqual(
589 'attachment; filename="windows [filename].ppt"',
590 msg['Content-Disposition'])
591
592 def test_nonascii_add_header_with_tspecial(self):
593 msg = Message()
594 msg.add_header('Content-Disposition', 'attachment',
595 filename="Fußballer [filename].ppt")
596 self.assertEqual(
597 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000598 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000599
R David Murraya2860e82011-04-16 09:20:30 -0400600 def test_add_header_with_name_only_param(self):
601 msg = Message()
602 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
603 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
604
605 def test_add_header_with_no_value(self):
606 msg = Message()
607 msg.add_header('X-Status', None)
608 self.assertEqual('', msg['X-Status'])
609
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000610 # Issue 5871: reject an attempt to embed a header inside a header value
611 # (header injection attack).
612 def test_embeded_header_via_Header_rejected(self):
613 msg = Message()
614 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
615 self.assertRaises(errors.HeaderParseError, msg.as_string)
616
617 def test_embeded_header_via_string_rejected(self):
618 msg = Message()
619 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
620 self.assertRaises(errors.HeaderParseError, msg.as_string)
621
R David Murray7441a7a2012-03-14 02:59:51 -0400622 def test_unicode_header_defaults_to_utf8_encoding(self):
623 # Issue 14291
624 m = MIMEText('abc\n')
625 m['Subject'] = 'É test'
626 self.assertEqual(str(m),textwrap.dedent("""\
627 Content-Type: text/plain; charset="us-ascii"
628 MIME-Version: 1.0
629 Content-Transfer-Encoding: 7bit
630 Subject: =?utf-8?q?=C3=89_test?=
631
632 abc
633 """))
634
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000635# Test the email.encoders module
636class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400637
638 def test_EncodersEncode_base64(self):
639 with openfile('PyBanner048.gif', 'rb') as fp:
640 bindata = fp.read()
641 mimed = email.mime.image.MIMEImage(bindata)
642 base64ed = mimed.get_payload()
643 # the transfer-encoded body lines should all be <=76 characters
644 lines = base64ed.split('\n')
645 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
646
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000647 def test_encode_empty_payload(self):
648 eq = self.assertEqual
649 msg = Message()
650 msg.set_charset('us-ascii')
651 eq(msg['content-transfer-encoding'], '7bit')
652
653 def test_default_cte(self):
654 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000655 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000656 msg = MIMEText('hello world')
657 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000658 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000659 msg = MIMEText('hello \xf8 world')
660 eq(msg['content-transfer-encoding'], '8bit')
661 # And now with a different charset
662 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
663 eq(msg['content-transfer-encoding'], 'quoted-printable')
664
R. David Murraye85200d2010-05-06 01:41:14 +0000665 def test_encode7or8bit(self):
666 # Make sure a charset whose input character set is 8bit but
667 # whose output character set is 7bit gets a transfer-encoding
668 # of 7bit.
669 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000670 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000671 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000672
Ezio Melottib3aedd42010-11-20 19:04:17 +0000673
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000674# Test long header wrapping
675class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400676
677 maxDiff = None
678
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000679 def test_split_long_continuation(self):
680 eq = self.ndiffAssertEqual
681 msg = email.message_from_string("""\
682Subject: bug demonstration
683\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
684\tmore text
685
686test
687""")
688 sfp = StringIO()
689 g = Generator(sfp)
690 g.flatten(msg)
691 eq(sfp.getvalue(), """\
692Subject: bug demonstration
693\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
694\tmore text
695
696test
697""")
698
699 def test_another_long_almost_unsplittable_header(self):
700 eq = self.ndiffAssertEqual
701 hstr = """\
702bug demonstration
703\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
704\tmore text"""
705 h = Header(hstr, continuation_ws='\t')
706 eq(h.encode(), """\
707bug demonstration
708\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
709\tmore text""")
710 h = Header(hstr.replace('\t', ' '))
711 eq(h.encode(), """\
712bug demonstration
713 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
714 more text""")
715
716 def test_long_nonstring(self):
717 eq = self.ndiffAssertEqual
718 g = Charset("iso-8859-1")
719 cz = Charset("iso-8859-2")
720 utf8 = Charset("utf-8")
721 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
722 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
723 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
724 b'bef\xf6rdert. ')
725 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
726 b'd\xf9vtipu.. ')
727 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
728 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
729 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
730 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
731 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
732 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
733 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
734 '\u3044\u307e\u3059\u3002')
735 h = Header(g_head, g, header_name='Subject')
736 h.append(cz_head, cz)
737 h.append(utf8_head, utf8)
738 msg = Message()
739 msg['Subject'] = h
740 sfp = StringIO()
741 g = Generator(sfp)
742 g.flatten(msg)
743 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000744Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
745 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
746 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
747 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
748 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
749 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
750 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
751 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
752 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
753 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
754 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000755
756""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000757 eq(h.encode(maxlinelen=76), """\
758=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
759 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
760 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
761 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
762 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
763 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
764 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
765 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
766 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
767 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
768 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000769
770 def test_long_header_encode(self):
771 eq = self.ndiffAssertEqual
772 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
773 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
774 header_name='X-Foobar-Spoink-Defrobnit')
775 eq(h.encode(), '''\
776wasnipoop; giraffes="very-long-necked-animals";
777 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
778
779 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
780 eq = self.ndiffAssertEqual
781 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
782 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
783 header_name='X-Foobar-Spoink-Defrobnit',
784 continuation_ws='\t')
785 eq(h.encode(), '''\
786wasnipoop; giraffes="very-long-necked-animals";
787 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
788
789 def test_long_header_encode_with_tab_continuation(self):
790 eq = self.ndiffAssertEqual
791 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
792 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
793 header_name='X-Foobar-Spoink-Defrobnit',
794 continuation_ws='\t')
795 eq(h.encode(), '''\
796wasnipoop; giraffes="very-long-necked-animals";
797\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
798
R David Murray3a6152f2011-03-14 21:13:03 -0400799 def test_header_encode_with_different_output_charset(self):
800 h = Header('文', 'euc-jp')
801 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
802
803 def test_long_header_encode_with_different_output_charset(self):
804 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
805 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
806 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
807 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
808 res = """\
809=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
810 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
811 self.assertEqual(h.encode(), res)
812
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000813 def test_header_splitter(self):
814 eq = self.ndiffAssertEqual
815 msg = MIMEText('')
816 # It'd be great if we could use add_header() here, but that doesn't
817 # guarantee an order of the parameters.
818 msg['X-Foobar-Spoink-Defrobnit'] = (
819 'wasnipoop; giraffes="very-long-necked-animals"; '
820 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
821 sfp = StringIO()
822 g = Generator(sfp)
823 g.flatten(msg)
824 eq(sfp.getvalue(), '''\
825Content-Type: text/plain; charset="us-ascii"
826MIME-Version: 1.0
827Content-Transfer-Encoding: 7bit
828X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
829 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
830
831''')
832
833 def test_no_semis_header_splitter(self):
834 eq = self.ndiffAssertEqual
835 msg = Message()
836 msg['From'] = 'test@dom.ain'
837 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
838 msg.set_payload('Test')
839 sfp = StringIO()
840 g = Generator(sfp)
841 g.flatten(msg)
842 eq(sfp.getvalue(), """\
843From: test@dom.ain
844References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
845 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
846
847Test""")
848
R David Murray7da4db12011-04-07 20:37:17 -0400849 def test_last_split_chunk_does_not_fit(self):
850 eq = self.ndiffAssertEqual
851 h = Header('Subject: the first part of this is short, but_the_second'
852 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
853 '_all_by_itself')
854 eq(h.encode(), """\
855Subject: the first part of this is short,
856 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
857
858 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
859 eq = self.ndiffAssertEqual
860 h = Header(', but_the_second'
861 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
862 '_all_by_itself')
863 eq(h.encode(), """\
864,
865 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
866
867 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
868 eq = self.ndiffAssertEqual
869 h = Header(', , but_the_second'
870 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
871 '_all_by_itself')
872 eq(h.encode(), """\
873, ,
874 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
875
876 def test_trailing_splitable_on_overlong_unsplitable(self):
877 eq = self.ndiffAssertEqual
878 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
879 'be_on_a_line_all_by_itself;')
880 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
881 "be_on_a_line_all_by_itself;")
882
883 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
884 eq = self.ndiffAssertEqual
885 h = Header('; '
886 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400887 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400888 eq(h.encode(), """\
889;
R David Murray01581ee2011-04-18 10:04:34 -0400890 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400891
R David Murraye1292a22011-04-07 20:54:03 -0400892 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400893 eq = self.ndiffAssertEqual
894 h = Header('This is a long line that has two whitespaces in a row. '
895 'This used to cause truncation of the header when folded')
896 eq(h.encode(), """\
897This is a long line that has two whitespaces in a row. This used to cause
898 truncation of the header when folded""")
899
R David Murray01581ee2011-04-18 10:04:34 -0400900 def test_splitter_split_on_punctuation_only_if_fws(self):
901 eq = self.ndiffAssertEqual
902 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
903 'they;arenotlegal;fold,points')
904 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
905 "arenotlegal;fold,points")
906
907 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
908 eq = self.ndiffAssertEqual
909 h = Header('this is a test where we need to have more than one line '
910 'before; our final line that is just too big to fit;; '
911 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
912 'be_on_a_line_all_by_itself;')
913 eq(h.encode(), """\
914this is a test where we need to have more than one line before;
915 our final line that is just too big to fit;;
916 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
917
918 def test_overlong_last_part_followed_by_split_point(self):
919 eq = self.ndiffAssertEqual
920 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
921 'be_on_a_line_all_by_itself ')
922 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
923 "should_be_on_a_line_all_by_itself ")
924
925 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
926 eq = self.ndiffAssertEqual
927 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
928 'before_our_final_line_; ; '
929 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
930 'be_on_a_line_all_by_itself; ')
931 eq(h.encode(), """\
932this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
933 ;
934 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
935
936 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
937 eq = self.ndiffAssertEqual
938 h = Header('this is a test where we need to have more than one line '
939 'before our final line; ; '
940 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
941 'be_on_a_line_all_by_itself; ')
942 eq(h.encode(), """\
943this is a test where we need to have more than one line before our final line;
944 ;
945 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
946
947 def test_long_header_with_whitespace_runs(self):
948 eq = self.ndiffAssertEqual
949 msg = Message()
950 msg['From'] = 'test@dom.ain'
951 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
952 msg.set_payload('Test')
953 sfp = StringIO()
954 g = Generator(sfp)
955 g.flatten(msg)
956 eq(sfp.getvalue(), """\
957From: test@dom.ain
958References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
959 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
960 <foo@dom.ain> <foo@dom.ain>\x20\x20
961
962Test""")
963
964 def test_long_run_with_semi_header_splitter(self):
965 eq = self.ndiffAssertEqual
966 msg = Message()
967 msg['From'] = 'test@dom.ain'
968 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
969 msg.set_payload('Test')
970 sfp = StringIO()
971 g = Generator(sfp)
972 g.flatten(msg)
973 eq(sfp.getvalue(), """\
974From: test@dom.ain
975References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
976 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
977 <foo@dom.ain>; abc
978
979Test""")
980
981 def test_splitter_split_on_punctuation_only_if_fws(self):
982 eq = self.ndiffAssertEqual
983 msg = Message()
984 msg['From'] = 'test@dom.ain'
985 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
986 'they;arenotlegal;fold,points')
987 msg.set_payload('Test')
988 sfp = StringIO()
989 g = Generator(sfp)
990 g.flatten(msg)
991 # XXX the space after the header should not be there.
992 eq(sfp.getvalue(), """\
993From: test@dom.ain
994References:\x20
995 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
996
997Test""")
998
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000999 def test_no_split_long_header(self):
1000 eq = self.ndiffAssertEqual
1001 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +00001002 h = Header(hstr)
1003 # These come on two lines because Headers are really field value
1004 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001005 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001006References:
1007 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1008 h = Header('x' * 80)
1009 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001010
1011 def test_splitting_multiple_long_lines(self):
1012 eq = self.ndiffAssertEqual
1013 hstr = """\
1014from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1015\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1016\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1017"""
1018 h = Header(hstr, continuation_ws='\t')
1019 eq(h.encode(), """\
1020from babylon.socal-raves.org (localhost [127.0.0.1]);
1021 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1022 for <mailman-admin@babylon.socal-raves.org>;
1023 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1024\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1025 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1026 for <mailman-admin@babylon.socal-raves.org>;
1027 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1028\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1029 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1030 for <mailman-admin@babylon.socal-raves.org>;
1031 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1032
1033 def test_splitting_first_line_only_is_long(self):
1034 eq = self.ndiffAssertEqual
1035 hstr = """\
1036from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1037\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1038\tid 17k4h5-00034i-00
1039\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1040 h = Header(hstr, maxlinelen=78, header_name='Received',
1041 continuation_ws='\t')
1042 eq(h.encode(), """\
1043from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1044 helo=cthulhu.gerg.ca)
1045\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1046\tid 17k4h5-00034i-00
1047\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1048
1049 def test_long_8bit_header(self):
1050 eq = self.ndiffAssertEqual
1051 msg = Message()
1052 h = Header('Britische Regierung gibt', 'iso-8859-1',
1053 header_name='Subject')
1054 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001055 eq(h.encode(maxlinelen=76), """\
1056=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1057 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001058 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001059 eq(msg.as_string(maxheaderlen=76), """\
1060Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1061 =?iso-8859-1?q?hore-Windkraftprojekte?=
1062
1063""")
1064 eq(msg.as_string(maxheaderlen=0), """\
1065Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001066
1067""")
1068
1069 def test_long_8bit_header_no_charset(self):
1070 eq = self.ndiffAssertEqual
1071 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001072 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1073 'f\xfcr Offshore-Windkraftprojekte '
1074 '<a-very-long-address@example.com>')
1075 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001076 eq(msg.as_string(maxheaderlen=78), """\
1077Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1078 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1079
1080""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001081 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001082 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001083 header_name='Reply-To')
1084 eq(msg.as_string(maxheaderlen=78), """\
1085Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1086 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001087
1088""")
1089
1090 def test_long_to_header(self):
1091 eq = self.ndiffAssertEqual
1092 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001093 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001094 '"Someone Test #B" <someone@umich.edu>, '
1095 '"Someone Test #C" <someone@eecs.umich.edu>, '
1096 '"Someone Test #D" <someone@eecs.umich.edu>')
1097 msg = Message()
1098 msg['To'] = to
1099 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001100To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001101 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001102 "Someone Test #C" <someone@eecs.umich.edu>,
1103 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001104
1105''')
1106
1107 def test_long_line_after_append(self):
1108 eq = self.ndiffAssertEqual
1109 s = 'This is an example of string which has almost the limit of header length.'
1110 h = Header(s)
1111 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001112 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001113This is an example of string which has almost the limit of header length.
1114 Add another line.""")
1115
1116 def test_shorter_line_with_append(self):
1117 eq = self.ndiffAssertEqual
1118 s = 'This is a shorter line.'
1119 h = Header(s)
1120 h.append('Add another sentence. (Surprise?)')
1121 eq(h.encode(),
1122 'This is a shorter line. Add another sentence. (Surprise?)')
1123
1124 def test_long_field_name(self):
1125 eq = self.ndiffAssertEqual
1126 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001127 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1128 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1129 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1130 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001131 h = Header(gs, 'iso-8859-1', header_name=fn)
1132 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001133 eq(h.encode(maxlinelen=76), """\
1134=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1135 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1136 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1137 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001138
1139 def test_long_received_header(self):
1140 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1141 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1142 'Wed, 05 Mar 2003 18:10:18 -0700')
1143 msg = Message()
1144 msg['Received-1'] = Header(h, continuation_ws='\t')
1145 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001146 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001147 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001148Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1149 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001150 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001151Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1152 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001153 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001154
1155""")
1156
1157 def test_string_headerinst_eq(self):
1158 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1159 'tu-muenchen.de> (David Bremner\'s message of '
1160 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1161 msg = Message()
1162 msg['Received-1'] = Header(h, header_name='Received-1',
1163 continuation_ws='\t')
1164 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001165 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001166 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001167Received-1:\x20
1168 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1169 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1170Received-2:\x20
1171 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1172 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001173
1174""")
1175
1176 def test_long_unbreakable_lines_with_continuation(self):
1177 eq = self.ndiffAssertEqual
1178 msg = Message()
1179 t = """\
1180iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1181 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1182 msg['Face-1'] = t
1183 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001184 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001185 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001186 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001187 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001188Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001189 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001190 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001191Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001192 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001193 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001194Face-3:\x20
1195 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1196 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001197
1198""")
1199
1200 def test_another_long_multiline_header(self):
1201 eq = self.ndiffAssertEqual
1202 m = ('Received: from siimage.com '
1203 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001204 'Microsoft SMTPSVC(5.0.2195.4905); '
1205 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001206 msg = email.message_from_string(m)
1207 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001208Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1209 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001210
1211''')
1212
1213 def test_long_lines_with_different_header(self):
1214 eq = self.ndiffAssertEqual
1215 h = ('List-Unsubscribe: '
1216 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1217 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1218 '?subject=unsubscribe>')
1219 msg = Message()
1220 msg['List'] = h
1221 msg['List'] = Header(h, header_name='List')
1222 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001223List: List-Unsubscribe:
1224 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001225 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001226List: List-Unsubscribe:
1227 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001228 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001229
1230""")
1231
R. David Murray6f0022d2011-01-07 21:57:25 +00001232 def test_long_rfc2047_header_with_embedded_fws(self):
1233 h = Header(textwrap.dedent("""\
1234 We're going to pretend this header is in a non-ascii character set
1235 \tto see if line wrapping with encoded words and embedded
1236 folding white space works"""),
1237 charset='utf-8',
1238 header_name='Test')
1239 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1240 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1241 =?utf-8?q?cter_set?=
1242 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1243 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1244
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001245
Ezio Melottib3aedd42010-11-20 19:04:17 +00001246
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001247# Test mangling of "From " lines in the body of a message
1248class TestFromMangling(unittest.TestCase):
1249 def setUp(self):
1250 self.msg = Message()
1251 self.msg['From'] = 'aaa@bbb.org'
1252 self.msg.set_payload("""\
1253From the desk of A.A.A.:
1254Blah blah blah
1255""")
1256
1257 def test_mangled_from(self):
1258 s = StringIO()
1259 g = Generator(s, mangle_from_=True)
1260 g.flatten(self.msg)
1261 self.assertEqual(s.getvalue(), """\
1262From: aaa@bbb.org
1263
1264>From the desk of A.A.A.:
1265Blah blah blah
1266""")
1267
1268 def test_dont_mangle_from(self):
1269 s = StringIO()
1270 g = Generator(s, mangle_from_=False)
1271 g.flatten(self.msg)
1272 self.assertEqual(s.getvalue(), """\
1273From: aaa@bbb.org
1274
1275From the desk of A.A.A.:
1276Blah blah blah
1277""")
1278
1279
Ezio Melottib3aedd42010-11-20 19:04:17 +00001280
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001281# Test the basic MIMEAudio class
1282class TestMIMEAudio(unittest.TestCase):
1283 def setUp(self):
1284 # Make sure we pick up the audiotest.au that lives in email/test/data.
1285 # In Python, there's an audiotest.au living in Lib/test but that isn't
1286 # included in some binary distros that don't include the test
1287 # package. The trailing empty string on the .join() is significant
1288 # since findfile() will do a dirname().
1289 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
1290 with open(findfile('audiotest.au', datadir), 'rb') as fp:
1291 self._audiodata = fp.read()
1292 self._au = MIMEAudio(self._audiodata)
1293
1294 def test_guess_minor_type(self):
1295 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1296
1297 def test_encoding(self):
1298 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001299 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1300 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001301
1302 def test_checkSetMinor(self):
1303 au = MIMEAudio(self._audiodata, 'fish')
1304 self.assertEqual(au.get_content_type(), 'audio/fish')
1305
1306 def test_add_header(self):
1307 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001308 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001309 self._au.add_header('Content-Disposition', 'attachment',
1310 filename='audiotest.au')
1311 eq(self._au['content-disposition'],
1312 'attachment; filename="audiotest.au"')
1313 eq(self._au.get_params(header='content-disposition'),
1314 [('attachment', ''), ('filename', 'audiotest.au')])
1315 eq(self._au.get_param('filename', header='content-disposition'),
1316 'audiotest.au')
1317 missing = []
1318 eq(self._au.get_param('attachment', header='content-disposition'), '')
1319 unless(self._au.get_param('foo', failobj=missing,
1320 header='content-disposition') is missing)
1321 # Try some missing stuff
1322 unless(self._au.get_param('foobar', missing) is missing)
1323 unless(self._au.get_param('attachment', missing,
1324 header='foobar') is missing)
1325
1326
Ezio Melottib3aedd42010-11-20 19:04:17 +00001327
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001328# Test the basic MIMEImage class
1329class TestMIMEImage(unittest.TestCase):
1330 def setUp(self):
1331 with openfile('PyBanner048.gif', 'rb') as fp:
1332 self._imgdata = fp.read()
1333 self._im = MIMEImage(self._imgdata)
1334
1335 def test_guess_minor_type(self):
1336 self.assertEqual(self._im.get_content_type(), 'image/gif')
1337
1338 def test_encoding(self):
1339 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001340 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1341 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001342
1343 def test_checkSetMinor(self):
1344 im = MIMEImage(self._imgdata, 'fish')
1345 self.assertEqual(im.get_content_type(), 'image/fish')
1346
1347 def test_add_header(self):
1348 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001349 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001350 self._im.add_header('Content-Disposition', 'attachment',
1351 filename='dingusfish.gif')
1352 eq(self._im['content-disposition'],
1353 'attachment; filename="dingusfish.gif"')
1354 eq(self._im.get_params(header='content-disposition'),
1355 [('attachment', ''), ('filename', 'dingusfish.gif')])
1356 eq(self._im.get_param('filename', header='content-disposition'),
1357 'dingusfish.gif')
1358 missing = []
1359 eq(self._im.get_param('attachment', header='content-disposition'), '')
1360 unless(self._im.get_param('foo', failobj=missing,
1361 header='content-disposition') is missing)
1362 # Try some missing stuff
1363 unless(self._im.get_param('foobar', missing) is missing)
1364 unless(self._im.get_param('attachment', missing,
1365 header='foobar') is missing)
1366
1367
Ezio Melottib3aedd42010-11-20 19:04:17 +00001368
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001369# Test the basic MIMEApplication class
1370class TestMIMEApplication(unittest.TestCase):
1371 def test_headers(self):
1372 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001373 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001374 eq(msg.get_content_type(), 'application/octet-stream')
1375 eq(msg['content-transfer-encoding'], 'base64')
1376
1377 def test_body(self):
1378 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001379 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1380 msg = MIMEApplication(bytesdata)
1381 # whitespace in the cte encoded block is RFC-irrelevant.
1382 eq(msg.get_payload().strip(), '+vv8/f7/')
1383 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001384
1385
Ezio Melottib3aedd42010-11-20 19:04:17 +00001386
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001387# Test the basic MIMEText class
1388class TestMIMEText(unittest.TestCase):
1389 def setUp(self):
1390 self._msg = MIMEText('hello there')
1391
1392 def test_types(self):
1393 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001394 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001395 eq(self._msg.get_content_type(), 'text/plain')
1396 eq(self._msg.get_param('charset'), 'us-ascii')
1397 missing = []
1398 unless(self._msg.get_param('foobar', missing) is missing)
1399 unless(self._msg.get_param('charset', missing, header='foobar')
1400 is missing)
1401
1402 def test_payload(self):
1403 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001404 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001405
1406 def test_charset(self):
1407 eq = self.assertEqual
1408 msg = MIMEText('hello there', _charset='us-ascii')
1409 eq(msg.get_charset().input_charset, 'us-ascii')
1410 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1411
R. David Murray850fc852010-06-03 01:58:28 +00001412 def test_7bit_input(self):
1413 eq = self.assertEqual
1414 msg = MIMEText('hello there', _charset='us-ascii')
1415 eq(msg.get_charset().input_charset, 'us-ascii')
1416 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1417
1418 def test_7bit_input_no_charset(self):
1419 eq = self.assertEqual
1420 msg = MIMEText('hello there')
1421 eq(msg.get_charset(), 'us-ascii')
1422 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1423 self.assertTrue('hello there' in msg.as_string())
1424
1425 def test_utf8_input(self):
1426 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1427 eq = self.assertEqual
1428 msg = MIMEText(teststr, _charset='utf-8')
1429 eq(msg.get_charset().output_charset, 'utf-8')
1430 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1431 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1432
1433 @unittest.skip("can't fix because of backward compat in email5, "
1434 "will fix in email6")
1435 def test_utf8_input_no_charset(self):
1436 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1437 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1438
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001439
Ezio Melottib3aedd42010-11-20 19:04:17 +00001440
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001441# Test complicated multipart/* messages
1442class TestMultipart(TestEmailBase):
1443 def setUp(self):
1444 with openfile('PyBanner048.gif', 'rb') as fp:
1445 data = fp.read()
1446 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1447 image = MIMEImage(data, name='dingusfish.gif')
1448 image.add_header('content-disposition', 'attachment',
1449 filename='dingusfish.gif')
1450 intro = MIMEText('''\
1451Hi there,
1452
1453This is the dingus fish.
1454''')
1455 container.attach(intro)
1456 container.attach(image)
1457 container['From'] = 'Barry <barry@digicool.com>'
1458 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1459 container['Subject'] = 'Here is your dingus fish'
1460
1461 now = 987809702.54848599
1462 timetuple = time.localtime(now)
1463 if timetuple[-1] == 0:
1464 tzsecs = time.timezone
1465 else:
1466 tzsecs = time.altzone
1467 if tzsecs > 0:
1468 sign = '-'
1469 else:
1470 sign = '+'
1471 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1472 container['Date'] = time.strftime(
1473 '%a, %d %b %Y %H:%M:%S',
1474 time.localtime(now)) + tzoffset
1475 self._msg = container
1476 self._im = image
1477 self._txt = intro
1478
1479 def test_hierarchy(self):
1480 # convenience
1481 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001482 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001483 raises = self.assertRaises
1484 # tests
1485 m = self._msg
1486 unless(m.is_multipart())
1487 eq(m.get_content_type(), 'multipart/mixed')
1488 eq(len(m.get_payload()), 2)
1489 raises(IndexError, m.get_payload, 2)
1490 m0 = m.get_payload(0)
1491 m1 = m.get_payload(1)
1492 unless(m0 is self._txt)
1493 unless(m1 is self._im)
1494 eq(m.get_payload(), [m0, m1])
1495 unless(not m0.is_multipart())
1496 unless(not m1.is_multipart())
1497
1498 def test_empty_multipart_idempotent(self):
1499 text = """\
1500Content-Type: multipart/mixed; boundary="BOUNDARY"
1501MIME-Version: 1.0
1502Subject: A subject
1503To: aperson@dom.ain
1504From: bperson@dom.ain
1505
1506
1507--BOUNDARY
1508
1509
1510--BOUNDARY--
1511"""
1512 msg = Parser().parsestr(text)
1513 self.ndiffAssertEqual(text, msg.as_string())
1514
1515 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1516 outer = MIMEBase('multipart', 'mixed')
1517 outer['Subject'] = 'A subject'
1518 outer['To'] = 'aperson@dom.ain'
1519 outer['From'] = 'bperson@dom.ain'
1520 outer.set_boundary('BOUNDARY')
1521 self.ndiffAssertEqual(outer.as_string(), '''\
1522Content-Type: multipart/mixed; boundary="BOUNDARY"
1523MIME-Version: 1.0
1524Subject: A subject
1525To: aperson@dom.ain
1526From: bperson@dom.ain
1527
1528--BOUNDARY
1529
1530--BOUNDARY--''')
1531
1532 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1533 outer = MIMEBase('multipart', 'mixed')
1534 outer['Subject'] = 'A subject'
1535 outer['To'] = 'aperson@dom.ain'
1536 outer['From'] = 'bperson@dom.ain'
1537 outer.preamble = ''
1538 outer.epilogue = ''
1539 outer.set_boundary('BOUNDARY')
1540 self.ndiffAssertEqual(outer.as_string(), '''\
1541Content-Type: multipart/mixed; boundary="BOUNDARY"
1542MIME-Version: 1.0
1543Subject: A subject
1544To: aperson@dom.ain
1545From: bperson@dom.ain
1546
1547
1548--BOUNDARY
1549
1550--BOUNDARY--
1551''')
1552
1553 def test_one_part_in_a_multipart(self):
1554 eq = self.ndiffAssertEqual
1555 outer = MIMEBase('multipart', 'mixed')
1556 outer['Subject'] = 'A subject'
1557 outer['To'] = 'aperson@dom.ain'
1558 outer['From'] = 'bperson@dom.ain'
1559 outer.set_boundary('BOUNDARY')
1560 msg = MIMEText('hello world')
1561 outer.attach(msg)
1562 eq(outer.as_string(), '''\
1563Content-Type: multipart/mixed; boundary="BOUNDARY"
1564MIME-Version: 1.0
1565Subject: A subject
1566To: aperson@dom.ain
1567From: bperson@dom.ain
1568
1569--BOUNDARY
1570Content-Type: text/plain; charset="us-ascii"
1571MIME-Version: 1.0
1572Content-Transfer-Encoding: 7bit
1573
1574hello world
1575--BOUNDARY--''')
1576
1577 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1578 eq = self.ndiffAssertEqual
1579 outer = MIMEBase('multipart', 'mixed')
1580 outer['Subject'] = 'A subject'
1581 outer['To'] = 'aperson@dom.ain'
1582 outer['From'] = 'bperson@dom.ain'
1583 outer.preamble = ''
1584 msg = MIMEText('hello world')
1585 outer.attach(msg)
1586 outer.set_boundary('BOUNDARY')
1587 eq(outer.as_string(), '''\
1588Content-Type: multipart/mixed; boundary="BOUNDARY"
1589MIME-Version: 1.0
1590Subject: A subject
1591To: aperson@dom.ain
1592From: bperson@dom.ain
1593
1594
1595--BOUNDARY
1596Content-Type: text/plain; charset="us-ascii"
1597MIME-Version: 1.0
1598Content-Transfer-Encoding: 7bit
1599
1600hello world
1601--BOUNDARY--''')
1602
1603
1604 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1605 eq = self.ndiffAssertEqual
1606 outer = MIMEBase('multipart', 'mixed')
1607 outer['Subject'] = 'A subject'
1608 outer['To'] = 'aperson@dom.ain'
1609 outer['From'] = 'bperson@dom.ain'
1610 outer.preamble = None
1611 msg = MIMEText('hello world')
1612 outer.attach(msg)
1613 outer.set_boundary('BOUNDARY')
1614 eq(outer.as_string(), '''\
1615Content-Type: multipart/mixed; boundary="BOUNDARY"
1616MIME-Version: 1.0
1617Subject: A subject
1618To: aperson@dom.ain
1619From: bperson@dom.ain
1620
1621--BOUNDARY
1622Content-Type: text/plain; charset="us-ascii"
1623MIME-Version: 1.0
1624Content-Transfer-Encoding: 7bit
1625
1626hello world
1627--BOUNDARY--''')
1628
1629
1630 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1631 eq = self.ndiffAssertEqual
1632 outer = MIMEBase('multipart', 'mixed')
1633 outer['Subject'] = 'A subject'
1634 outer['To'] = 'aperson@dom.ain'
1635 outer['From'] = 'bperson@dom.ain'
1636 outer.epilogue = None
1637 msg = MIMEText('hello world')
1638 outer.attach(msg)
1639 outer.set_boundary('BOUNDARY')
1640 eq(outer.as_string(), '''\
1641Content-Type: multipart/mixed; boundary="BOUNDARY"
1642MIME-Version: 1.0
1643Subject: A subject
1644To: aperson@dom.ain
1645From: bperson@dom.ain
1646
1647--BOUNDARY
1648Content-Type: text/plain; charset="us-ascii"
1649MIME-Version: 1.0
1650Content-Transfer-Encoding: 7bit
1651
1652hello world
1653--BOUNDARY--''')
1654
1655
1656 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1657 eq = self.ndiffAssertEqual
1658 outer = MIMEBase('multipart', 'mixed')
1659 outer['Subject'] = 'A subject'
1660 outer['To'] = 'aperson@dom.ain'
1661 outer['From'] = 'bperson@dom.ain'
1662 outer.epilogue = ''
1663 msg = MIMEText('hello world')
1664 outer.attach(msg)
1665 outer.set_boundary('BOUNDARY')
1666 eq(outer.as_string(), '''\
1667Content-Type: multipart/mixed; boundary="BOUNDARY"
1668MIME-Version: 1.0
1669Subject: A subject
1670To: aperson@dom.ain
1671From: bperson@dom.ain
1672
1673--BOUNDARY
1674Content-Type: text/plain; charset="us-ascii"
1675MIME-Version: 1.0
1676Content-Transfer-Encoding: 7bit
1677
1678hello world
1679--BOUNDARY--
1680''')
1681
1682
1683 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1684 eq = self.ndiffAssertEqual
1685 outer = MIMEBase('multipart', 'mixed')
1686 outer['Subject'] = 'A subject'
1687 outer['To'] = 'aperson@dom.ain'
1688 outer['From'] = 'bperson@dom.ain'
1689 outer.epilogue = '\n'
1690 msg = MIMEText('hello world')
1691 outer.attach(msg)
1692 outer.set_boundary('BOUNDARY')
1693 eq(outer.as_string(), '''\
1694Content-Type: multipart/mixed; boundary="BOUNDARY"
1695MIME-Version: 1.0
1696Subject: A subject
1697To: aperson@dom.ain
1698From: bperson@dom.ain
1699
1700--BOUNDARY
1701Content-Type: text/plain; charset="us-ascii"
1702MIME-Version: 1.0
1703Content-Transfer-Encoding: 7bit
1704
1705hello world
1706--BOUNDARY--
1707
1708''')
1709
1710 def test_message_external_body(self):
1711 eq = self.assertEqual
1712 msg = self._msgobj('msg_36.txt')
1713 eq(len(msg.get_payload()), 2)
1714 msg1 = msg.get_payload(1)
1715 eq(msg1.get_content_type(), 'multipart/alternative')
1716 eq(len(msg1.get_payload()), 2)
1717 for subpart in msg1.get_payload():
1718 eq(subpart.get_content_type(), 'message/external-body')
1719 eq(len(subpart.get_payload()), 1)
1720 subsubpart = subpart.get_payload(0)
1721 eq(subsubpart.get_content_type(), 'text/plain')
1722
1723 def test_double_boundary(self):
1724 # msg_37.txt is a multipart that contains two dash-boundary's in a
1725 # row. Our interpretation of RFC 2046 calls for ignoring the second
1726 # and subsequent boundaries.
1727 msg = self._msgobj('msg_37.txt')
1728 self.assertEqual(len(msg.get_payload()), 3)
1729
1730 def test_nested_inner_contains_outer_boundary(self):
1731 eq = self.ndiffAssertEqual
1732 # msg_38.txt has an inner part that contains outer boundaries. My
1733 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1734 # these are illegal and should be interpreted as unterminated inner
1735 # parts.
1736 msg = self._msgobj('msg_38.txt')
1737 sfp = StringIO()
1738 iterators._structure(msg, sfp)
1739 eq(sfp.getvalue(), """\
1740multipart/mixed
1741 multipart/mixed
1742 multipart/alternative
1743 text/plain
1744 text/plain
1745 text/plain
1746 text/plain
1747""")
1748
1749 def test_nested_with_same_boundary(self):
1750 eq = self.ndiffAssertEqual
1751 # msg 39.txt is similarly evil in that it's got inner parts that use
1752 # the same boundary as outer parts. Again, I believe the way this is
1753 # parsed is closest to the spirit of RFC 2046
1754 msg = self._msgobj('msg_39.txt')
1755 sfp = StringIO()
1756 iterators._structure(msg, sfp)
1757 eq(sfp.getvalue(), """\
1758multipart/mixed
1759 multipart/mixed
1760 multipart/alternative
1761 application/octet-stream
1762 application/octet-stream
1763 text/plain
1764""")
1765
1766 def test_boundary_in_non_multipart(self):
1767 msg = self._msgobj('msg_40.txt')
1768 self.assertEqual(msg.as_string(), '''\
1769MIME-Version: 1.0
1770Content-Type: text/html; boundary="--961284236552522269"
1771
1772----961284236552522269
1773Content-Type: text/html;
1774Content-Transfer-Encoding: 7Bit
1775
1776<html></html>
1777
1778----961284236552522269--
1779''')
1780
1781 def test_boundary_with_leading_space(self):
1782 eq = self.assertEqual
1783 msg = email.message_from_string('''\
1784MIME-Version: 1.0
1785Content-Type: multipart/mixed; boundary=" XXXX"
1786
1787-- XXXX
1788Content-Type: text/plain
1789
1790
1791-- XXXX
1792Content-Type: text/plain
1793
1794-- XXXX--
1795''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001796 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001797 eq(msg.get_boundary(), ' XXXX')
1798 eq(len(msg.get_payload()), 2)
1799
1800 def test_boundary_without_trailing_newline(self):
1801 m = Parser().parsestr("""\
1802Content-Type: multipart/mixed; boundary="===============0012394164=="
1803MIME-Version: 1.0
1804
1805--===============0012394164==
1806Content-Type: image/file1.jpg
1807MIME-Version: 1.0
1808Content-Transfer-Encoding: base64
1809
1810YXNkZg==
1811--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001812 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001813
1814
Ezio Melottib3aedd42010-11-20 19:04:17 +00001815
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001816# Test some badly formatted messages
1817class TestNonConformant(TestEmailBase):
1818 def test_parse_missing_minor_type(self):
1819 eq = self.assertEqual
1820 msg = self._msgobj('msg_14.txt')
1821 eq(msg.get_content_type(), 'text/plain')
1822 eq(msg.get_content_maintype(), 'text')
1823 eq(msg.get_content_subtype(), 'plain')
1824
1825 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001826 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001827 msg = self._msgobj('msg_15.txt')
1828 # XXX We can probably eventually do better
1829 inner = msg.get_payload(0)
1830 unless(hasattr(inner, 'defects'))
1831 self.assertEqual(len(inner.defects), 1)
1832 unless(isinstance(inner.defects[0],
1833 errors.StartBoundaryNotFoundDefect))
1834
1835 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001836 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001837 msg = self._msgobj('msg_25.txt')
1838 unless(isinstance(msg.get_payload(), str))
1839 self.assertEqual(len(msg.defects), 2)
1840 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1841 unless(isinstance(msg.defects[1],
1842 errors.MultipartInvariantViolationDefect))
1843
1844 def test_invalid_content_type(self):
1845 eq = self.assertEqual
1846 neq = self.ndiffAssertEqual
1847 msg = Message()
1848 # RFC 2045, $5.2 says invalid yields text/plain
1849 msg['Content-Type'] = 'text'
1850 eq(msg.get_content_maintype(), 'text')
1851 eq(msg.get_content_subtype(), 'plain')
1852 eq(msg.get_content_type(), 'text/plain')
1853 # Clear the old value and try something /really/ invalid
1854 del msg['content-type']
1855 msg['Content-Type'] = 'foo'
1856 eq(msg.get_content_maintype(), 'text')
1857 eq(msg.get_content_subtype(), 'plain')
1858 eq(msg.get_content_type(), 'text/plain')
1859 # Still, make sure that the message is idempotently generated
1860 s = StringIO()
1861 g = Generator(s)
1862 g.flatten(msg)
1863 neq(s.getvalue(), 'Content-Type: foo\n\n')
1864
1865 def test_no_start_boundary(self):
1866 eq = self.ndiffAssertEqual
1867 msg = self._msgobj('msg_31.txt')
1868 eq(msg.get_payload(), """\
1869--BOUNDARY
1870Content-Type: text/plain
1871
1872message 1
1873
1874--BOUNDARY
1875Content-Type: text/plain
1876
1877message 2
1878
1879--BOUNDARY--
1880""")
1881
1882 def test_no_separating_blank_line(self):
1883 eq = self.ndiffAssertEqual
1884 msg = self._msgobj('msg_35.txt')
1885 eq(msg.as_string(), """\
1886From: aperson@dom.ain
1887To: bperson@dom.ain
1888Subject: here's something interesting
1889
1890counter to RFC 2822, there's no separating newline here
1891""")
1892
1893 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001894 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001895 msg = self._msgobj('msg_41.txt')
1896 unless(hasattr(msg, 'defects'))
1897 self.assertEqual(len(msg.defects), 2)
1898 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1899 unless(isinstance(msg.defects[1],
1900 errors.MultipartInvariantViolationDefect))
1901
1902 def test_missing_start_boundary(self):
1903 outer = self._msgobj('msg_42.txt')
1904 # The message structure is:
1905 #
1906 # multipart/mixed
1907 # text/plain
1908 # message/rfc822
1909 # multipart/mixed [*]
1910 #
1911 # [*] This message is missing its start boundary
1912 bad = outer.get_payload(1).get_payload(0)
1913 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001914 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001915 errors.StartBoundaryNotFoundDefect))
1916
1917 def test_first_line_is_continuation_header(self):
1918 eq = self.assertEqual
1919 m = ' Line 1\nLine 2\nLine 3'
1920 msg = email.message_from_string(m)
1921 eq(msg.keys(), [])
1922 eq(msg.get_payload(), 'Line 2\nLine 3')
1923 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001924 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001925 errors.FirstHeaderLineIsContinuationDefect))
1926 eq(msg.defects[0].line, ' Line 1\n')
1927
1928
Ezio Melottib3aedd42010-11-20 19:04:17 +00001929
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001930# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001931class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001932 def test_rfc2047_multiline(self):
1933 eq = self.assertEqual
1934 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1935 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1936 dh = decode_header(s)
1937 eq(dh, [
1938 (b'Re:', None),
1939 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1940 (b'baz foo bar', None),
1941 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1942 header = make_header(dh)
1943 eq(str(header),
1944 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001945 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001946Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1947 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001948
1949 def test_whitespace_eater_unicode(self):
1950 eq = self.assertEqual
1951 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1952 dh = decode_header(s)
1953 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1954 (b'Pirard <pirard@dom.ain>', None)])
1955 header = str(make_header(dh))
1956 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1957
1958 def test_whitespace_eater_unicode_2(self):
1959 eq = self.assertEqual
1960 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1961 dh = decode_header(s)
1962 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1963 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1964 hu = str(make_header(dh))
1965 eq(hu, 'The quick brown fox jumped over the lazy dog')
1966
1967 def test_rfc2047_missing_whitespace(self):
1968 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1969 dh = decode_header(s)
1970 self.assertEqual(dh, [(s, None)])
1971
1972 def test_rfc2047_with_whitespace(self):
1973 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1974 dh = decode_header(s)
1975 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1976 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1977 (b'sbord', None)])
1978
R. David Murrayc4e69cc2010-08-03 22:14:10 +00001979 def test_rfc2047_B_bad_padding(self):
1980 s = '=?iso-8859-1?B?%s?='
1981 data = [ # only test complete bytes
1982 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
1983 ('dmk=', b'vi'), ('dmk', b'vi')
1984 ]
1985 for q, a in data:
1986 dh = decode_header(s % q)
1987 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001988
R. David Murray31e984c2010-10-01 15:40:20 +00001989 def test_rfc2047_Q_invalid_digits(self):
1990 # issue 10004.
1991 s = '=?iso-8659-1?Q?andr=e9=zz?='
1992 self.assertEqual(decode_header(s),
1993 [(b'andr\xe9=zz', 'iso-8659-1')])
1994
Ezio Melottib3aedd42010-11-20 19:04:17 +00001995
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001996# Test the MIMEMessage class
1997class TestMIMEMessage(TestEmailBase):
1998 def setUp(self):
1999 with openfile('msg_11.txt') as fp:
2000 self._text = fp.read()
2001
2002 def test_type_error(self):
2003 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2004
2005 def test_valid_argument(self):
2006 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002007 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002008 subject = 'A sub-message'
2009 m = Message()
2010 m['Subject'] = subject
2011 r = MIMEMessage(m)
2012 eq(r.get_content_type(), 'message/rfc822')
2013 payload = r.get_payload()
2014 unless(isinstance(payload, list))
2015 eq(len(payload), 1)
2016 subpart = payload[0]
2017 unless(subpart is m)
2018 eq(subpart['subject'], subject)
2019
2020 def test_bad_multipart(self):
2021 eq = self.assertEqual
2022 msg1 = Message()
2023 msg1['Subject'] = 'subpart 1'
2024 msg2 = Message()
2025 msg2['Subject'] = 'subpart 2'
2026 r = MIMEMessage(msg1)
2027 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2028
2029 def test_generate(self):
2030 # First craft the message to be encapsulated
2031 m = Message()
2032 m['Subject'] = 'An enclosed message'
2033 m.set_payload('Here is the body of the message.\n')
2034 r = MIMEMessage(m)
2035 r['Subject'] = 'The enclosing message'
2036 s = StringIO()
2037 g = Generator(s)
2038 g.flatten(r)
2039 self.assertEqual(s.getvalue(), """\
2040Content-Type: message/rfc822
2041MIME-Version: 1.0
2042Subject: The enclosing message
2043
2044Subject: An enclosed message
2045
2046Here is the body of the message.
2047""")
2048
2049 def test_parse_message_rfc822(self):
2050 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002051 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002052 msg = self._msgobj('msg_11.txt')
2053 eq(msg.get_content_type(), 'message/rfc822')
2054 payload = msg.get_payload()
2055 unless(isinstance(payload, list))
2056 eq(len(payload), 1)
2057 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002058 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002059 eq(submsg['subject'], 'An enclosed message')
2060 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2061
2062 def test_dsn(self):
2063 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002064 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002065 # msg 16 is a Delivery Status Notification, see RFC 1894
2066 msg = self._msgobj('msg_16.txt')
2067 eq(msg.get_content_type(), 'multipart/report')
2068 unless(msg.is_multipart())
2069 eq(len(msg.get_payload()), 3)
2070 # Subpart 1 is a text/plain, human readable section
2071 subpart = msg.get_payload(0)
2072 eq(subpart.get_content_type(), 'text/plain')
2073 eq(subpart.get_payload(), """\
2074This report relates to a message you sent with the following header fields:
2075
2076 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2077 Date: Sun, 23 Sep 2001 20:10:55 -0700
2078 From: "Ian T. Henry" <henryi@oxy.edu>
2079 To: SoCal Raves <scr@socal-raves.org>
2080 Subject: [scr] yeah for Ians!!
2081
2082Your message cannot be delivered to the following recipients:
2083
2084 Recipient address: jangel1@cougar.noc.ucla.edu
2085 Reason: recipient reached disk quota
2086
2087""")
2088 # Subpart 2 contains the machine parsable DSN information. It
2089 # consists of two blocks of headers, represented by two nested Message
2090 # objects.
2091 subpart = msg.get_payload(1)
2092 eq(subpart.get_content_type(), 'message/delivery-status')
2093 eq(len(subpart.get_payload()), 2)
2094 # message/delivery-status should treat each block as a bunch of
2095 # headers, i.e. a bunch of Message objects.
2096 dsn1 = subpart.get_payload(0)
2097 unless(isinstance(dsn1, Message))
2098 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2099 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2100 # Try a missing one <wink>
2101 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2102 dsn2 = subpart.get_payload(1)
2103 unless(isinstance(dsn2, Message))
2104 eq(dsn2['action'], 'failed')
2105 eq(dsn2.get_params(header='original-recipient'),
2106 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2107 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2108 # Subpart 3 is the original message
2109 subpart = msg.get_payload(2)
2110 eq(subpart.get_content_type(), 'message/rfc822')
2111 payload = subpart.get_payload()
2112 unless(isinstance(payload, list))
2113 eq(len(payload), 1)
2114 subsubpart = payload[0]
2115 unless(isinstance(subsubpart, Message))
2116 eq(subsubpart.get_content_type(), 'text/plain')
2117 eq(subsubpart['message-id'],
2118 '<002001c144a6$8752e060$56104586@oxy.edu>')
2119
2120 def test_epilogue(self):
2121 eq = self.ndiffAssertEqual
2122 with openfile('msg_21.txt') as fp:
2123 text = fp.read()
2124 msg = Message()
2125 msg['From'] = 'aperson@dom.ain'
2126 msg['To'] = 'bperson@dom.ain'
2127 msg['Subject'] = 'Test'
2128 msg.preamble = 'MIME message'
2129 msg.epilogue = 'End of MIME message\n'
2130 msg1 = MIMEText('One')
2131 msg2 = MIMEText('Two')
2132 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2133 msg.attach(msg1)
2134 msg.attach(msg2)
2135 sfp = StringIO()
2136 g = Generator(sfp)
2137 g.flatten(msg)
2138 eq(sfp.getvalue(), text)
2139
2140 def test_no_nl_preamble(self):
2141 eq = self.ndiffAssertEqual
2142 msg = Message()
2143 msg['From'] = 'aperson@dom.ain'
2144 msg['To'] = 'bperson@dom.ain'
2145 msg['Subject'] = 'Test'
2146 msg.preamble = 'MIME message'
2147 msg.epilogue = ''
2148 msg1 = MIMEText('One')
2149 msg2 = MIMEText('Two')
2150 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2151 msg.attach(msg1)
2152 msg.attach(msg2)
2153 eq(msg.as_string(), """\
2154From: aperson@dom.ain
2155To: bperson@dom.ain
2156Subject: Test
2157Content-Type: multipart/mixed; boundary="BOUNDARY"
2158
2159MIME message
2160--BOUNDARY
2161Content-Type: text/plain; charset="us-ascii"
2162MIME-Version: 1.0
2163Content-Transfer-Encoding: 7bit
2164
2165One
2166--BOUNDARY
2167Content-Type: text/plain; charset="us-ascii"
2168MIME-Version: 1.0
2169Content-Transfer-Encoding: 7bit
2170
2171Two
2172--BOUNDARY--
2173""")
2174
2175 def test_default_type(self):
2176 eq = self.assertEqual
2177 with openfile('msg_30.txt') as fp:
2178 msg = email.message_from_file(fp)
2179 container1 = msg.get_payload(0)
2180 eq(container1.get_default_type(), 'message/rfc822')
2181 eq(container1.get_content_type(), 'message/rfc822')
2182 container2 = msg.get_payload(1)
2183 eq(container2.get_default_type(), 'message/rfc822')
2184 eq(container2.get_content_type(), 'message/rfc822')
2185 container1a = container1.get_payload(0)
2186 eq(container1a.get_default_type(), 'text/plain')
2187 eq(container1a.get_content_type(), 'text/plain')
2188 container2a = container2.get_payload(0)
2189 eq(container2a.get_default_type(), 'text/plain')
2190 eq(container2a.get_content_type(), 'text/plain')
2191
2192 def test_default_type_with_explicit_container_type(self):
2193 eq = self.assertEqual
2194 with openfile('msg_28.txt') as fp:
2195 msg = email.message_from_file(fp)
2196 container1 = msg.get_payload(0)
2197 eq(container1.get_default_type(), 'message/rfc822')
2198 eq(container1.get_content_type(), 'message/rfc822')
2199 container2 = msg.get_payload(1)
2200 eq(container2.get_default_type(), 'message/rfc822')
2201 eq(container2.get_content_type(), 'message/rfc822')
2202 container1a = container1.get_payload(0)
2203 eq(container1a.get_default_type(), 'text/plain')
2204 eq(container1a.get_content_type(), 'text/plain')
2205 container2a = container2.get_payload(0)
2206 eq(container2a.get_default_type(), 'text/plain')
2207 eq(container2a.get_content_type(), 'text/plain')
2208
2209 def test_default_type_non_parsed(self):
2210 eq = self.assertEqual
2211 neq = self.ndiffAssertEqual
2212 # Set up container
2213 container = MIMEMultipart('digest', 'BOUNDARY')
2214 container.epilogue = ''
2215 # Set up subparts
2216 subpart1a = MIMEText('message 1\n')
2217 subpart2a = MIMEText('message 2\n')
2218 subpart1 = MIMEMessage(subpart1a)
2219 subpart2 = MIMEMessage(subpart2a)
2220 container.attach(subpart1)
2221 container.attach(subpart2)
2222 eq(subpart1.get_content_type(), 'message/rfc822')
2223 eq(subpart1.get_default_type(), 'message/rfc822')
2224 eq(subpart2.get_content_type(), 'message/rfc822')
2225 eq(subpart2.get_default_type(), 'message/rfc822')
2226 neq(container.as_string(0), '''\
2227Content-Type: multipart/digest; boundary="BOUNDARY"
2228MIME-Version: 1.0
2229
2230--BOUNDARY
2231Content-Type: message/rfc822
2232MIME-Version: 1.0
2233
2234Content-Type: text/plain; charset="us-ascii"
2235MIME-Version: 1.0
2236Content-Transfer-Encoding: 7bit
2237
2238message 1
2239
2240--BOUNDARY
2241Content-Type: message/rfc822
2242MIME-Version: 1.0
2243
2244Content-Type: text/plain; charset="us-ascii"
2245MIME-Version: 1.0
2246Content-Transfer-Encoding: 7bit
2247
2248message 2
2249
2250--BOUNDARY--
2251''')
2252 del subpart1['content-type']
2253 del subpart1['mime-version']
2254 del subpart2['content-type']
2255 del subpart2['mime-version']
2256 eq(subpart1.get_content_type(), 'message/rfc822')
2257 eq(subpart1.get_default_type(), 'message/rfc822')
2258 eq(subpart2.get_content_type(), 'message/rfc822')
2259 eq(subpart2.get_default_type(), 'message/rfc822')
2260 neq(container.as_string(0), '''\
2261Content-Type: multipart/digest; boundary="BOUNDARY"
2262MIME-Version: 1.0
2263
2264--BOUNDARY
2265
2266Content-Type: text/plain; charset="us-ascii"
2267MIME-Version: 1.0
2268Content-Transfer-Encoding: 7bit
2269
2270message 1
2271
2272--BOUNDARY
2273
2274Content-Type: text/plain; charset="us-ascii"
2275MIME-Version: 1.0
2276Content-Transfer-Encoding: 7bit
2277
2278message 2
2279
2280--BOUNDARY--
2281''')
2282
2283 def test_mime_attachments_in_constructor(self):
2284 eq = self.assertEqual
2285 text1 = MIMEText('')
2286 text2 = MIMEText('')
2287 msg = MIMEMultipart(_subparts=(text1, text2))
2288 eq(len(msg.get_payload()), 2)
2289 eq(msg.get_payload(0), text1)
2290 eq(msg.get_payload(1), text2)
2291
Christian Heimes587c2bf2008-01-19 16:21:02 +00002292 def test_default_multipart_constructor(self):
2293 msg = MIMEMultipart()
2294 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002295
Ezio Melottib3aedd42010-11-20 19:04:17 +00002296
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002297# A general test of parser->model->generator idempotency. IOW, read a message
2298# in, parse it into a message object tree, then without touching the tree,
2299# regenerate the plain text. The original text and the transformed text
2300# should be identical. Note: that we ignore the Unix-From since that may
2301# contain a changed date.
2302class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002303
2304 linesep = '\n'
2305
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002306 def _msgobj(self, filename):
2307 with openfile(filename) as fp:
2308 data = fp.read()
2309 msg = email.message_from_string(data)
2310 return msg, data
2311
R. David Murray719a4492010-11-21 16:53:48 +00002312 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002313 eq = self.ndiffAssertEqual
2314 s = StringIO()
2315 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002316 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002317 eq(text, s.getvalue())
2318
2319 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002320 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002321 msg, text = self._msgobj('msg_01.txt')
2322 eq(msg.get_content_type(), 'text/plain')
2323 eq(msg.get_content_maintype(), 'text')
2324 eq(msg.get_content_subtype(), 'plain')
2325 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2326 eq(msg.get_param('charset'), 'us-ascii')
2327 eq(msg.preamble, None)
2328 eq(msg.epilogue, None)
2329 self._idempotent(msg, text)
2330
2331 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002332 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002333 msg, text = self._msgobj('msg_03.txt')
2334 eq(msg.get_content_type(), 'text/plain')
2335 eq(msg.get_params(), None)
2336 eq(msg.get_param('charset'), None)
2337 self._idempotent(msg, text)
2338
2339 def test_simple_multipart(self):
2340 msg, text = self._msgobj('msg_04.txt')
2341 self._idempotent(msg, text)
2342
2343 def test_MIME_digest(self):
2344 msg, text = self._msgobj('msg_02.txt')
2345 self._idempotent(msg, text)
2346
2347 def test_long_header(self):
2348 msg, text = self._msgobj('msg_27.txt')
2349 self._idempotent(msg, text)
2350
2351 def test_MIME_digest_with_part_headers(self):
2352 msg, text = self._msgobj('msg_28.txt')
2353 self._idempotent(msg, text)
2354
2355 def test_mixed_with_image(self):
2356 msg, text = self._msgobj('msg_06.txt')
2357 self._idempotent(msg, text)
2358
2359 def test_multipart_report(self):
2360 msg, text = self._msgobj('msg_05.txt')
2361 self._idempotent(msg, text)
2362
2363 def test_dsn(self):
2364 msg, text = self._msgobj('msg_16.txt')
2365 self._idempotent(msg, text)
2366
2367 def test_preamble_epilogue(self):
2368 msg, text = self._msgobj('msg_21.txt')
2369 self._idempotent(msg, text)
2370
2371 def test_multipart_one_part(self):
2372 msg, text = self._msgobj('msg_23.txt')
2373 self._idempotent(msg, text)
2374
2375 def test_multipart_no_parts(self):
2376 msg, text = self._msgobj('msg_24.txt')
2377 self._idempotent(msg, text)
2378
2379 def test_no_start_boundary(self):
2380 msg, text = self._msgobj('msg_31.txt')
2381 self._idempotent(msg, text)
2382
2383 def test_rfc2231_charset(self):
2384 msg, text = self._msgobj('msg_32.txt')
2385 self._idempotent(msg, text)
2386
2387 def test_more_rfc2231_parameters(self):
2388 msg, text = self._msgobj('msg_33.txt')
2389 self._idempotent(msg, text)
2390
2391 def test_text_plain_in_a_multipart_digest(self):
2392 msg, text = self._msgobj('msg_34.txt')
2393 self._idempotent(msg, text)
2394
2395 def test_nested_multipart_mixeds(self):
2396 msg, text = self._msgobj('msg_12a.txt')
2397 self._idempotent(msg, text)
2398
2399 def test_message_external_body_idempotent(self):
2400 msg, text = self._msgobj('msg_36.txt')
2401 self._idempotent(msg, text)
2402
R. David Murray719a4492010-11-21 16:53:48 +00002403 def test_message_delivery_status(self):
2404 msg, text = self._msgobj('msg_43.txt')
2405 self._idempotent(msg, text, unixfrom=True)
2406
R. David Murray96fd54e2010-10-08 15:55:28 +00002407 def test_message_signed_idempotent(self):
2408 msg, text = self._msgobj('msg_45.txt')
2409 self._idempotent(msg, text)
2410
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002411 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002412 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002413 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002414 # Get a message object and reset the seek pointer for other tests
2415 msg, text = self._msgobj('msg_05.txt')
2416 eq(msg.get_content_type(), 'multipart/report')
2417 # Test the Content-Type: parameters
2418 params = {}
2419 for pk, pv in msg.get_params():
2420 params[pk] = pv
2421 eq(params['report-type'], 'delivery-status')
2422 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002423 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2424 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002425 eq(len(msg.get_payload()), 3)
2426 # Make sure the subparts are what we expect
2427 msg1 = msg.get_payload(0)
2428 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002429 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002430 msg2 = msg.get_payload(1)
2431 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002432 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002433 msg3 = msg.get_payload(2)
2434 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002435 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002436 payload = msg3.get_payload()
2437 unless(isinstance(payload, list))
2438 eq(len(payload), 1)
2439 msg4 = payload[0]
2440 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002441 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002442
2443 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002444 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002445 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002446 msg, text = self._msgobj('msg_06.txt')
2447 # Check some of the outer headers
2448 eq(msg.get_content_type(), 'message/rfc822')
2449 # Make sure the payload is a list of exactly one sub-Message, and that
2450 # that submessage has a type of text/plain
2451 payload = msg.get_payload()
2452 unless(isinstance(payload, list))
2453 eq(len(payload), 1)
2454 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002455 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002456 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002457 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002458 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002459
2460
Ezio Melottib3aedd42010-11-20 19:04:17 +00002461
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002462# Test various other bits of the package's functionality
2463class TestMiscellaneous(TestEmailBase):
2464 def test_message_from_string(self):
2465 with openfile('msg_01.txt') as fp:
2466 text = fp.read()
2467 msg = email.message_from_string(text)
2468 s = StringIO()
2469 # Don't wrap/continue long headers since we're trying to test
2470 # idempotency.
2471 g = Generator(s, maxheaderlen=0)
2472 g.flatten(msg)
2473 self.assertEqual(text, s.getvalue())
2474
2475 def test_message_from_file(self):
2476 with openfile('msg_01.txt') as fp:
2477 text = fp.read()
2478 fp.seek(0)
2479 msg = email.message_from_file(fp)
2480 s = StringIO()
2481 # Don't wrap/continue long headers since we're trying to test
2482 # idempotency.
2483 g = Generator(s, maxheaderlen=0)
2484 g.flatten(msg)
2485 self.assertEqual(text, s.getvalue())
2486
2487 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002488 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002489 with openfile('msg_01.txt') as fp:
2490 text = fp.read()
2491
2492 # Create a subclass
2493 class MyMessage(Message):
2494 pass
2495
2496 msg = email.message_from_string(text, MyMessage)
2497 unless(isinstance(msg, MyMessage))
2498 # Try something more complicated
2499 with openfile('msg_02.txt') as fp:
2500 text = fp.read()
2501 msg = email.message_from_string(text, MyMessage)
2502 for subpart in msg.walk():
2503 unless(isinstance(subpart, MyMessage))
2504
2505 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002506 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002507 # Create a subclass
2508 class MyMessage(Message):
2509 pass
2510
2511 with openfile('msg_01.txt') as fp:
2512 msg = email.message_from_file(fp, MyMessage)
2513 unless(isinstance(msg, MyMessage))
2514 # Try something more complicated
2515 with openfile('msg_02.txt') as fp:
2516 msg = email.message_from_file(fp, MyMessage)
2517 for subpart in msg.walk():
2518 unless(isinstance(subpart, MyMessage))
2519
2520 def test__all__(self):
2521 module = __import__('email')
2522 # Can't use sorted() here due to Python 2.3 compatibility
2523 all = module.__all__[:]
2524 all.sort()
2525 self.assertEqual(all, [
2526 'base64mime', 'charset', 'encoders', 'errors', 'generator',
R. David Murray96fd54e2010-10-08 15:55:28 +00002527 'header', 'iterators', 'message', 'message_from_binary_file',
2528 'message_from_bytes', 'message_from_file',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002529 'message_from_string', 'mime', 'parser',
2530 'quoprimime', 'utils',
2531 ])
2532
2533 def test_formatdate(self):
2534 now = time.time()
2535 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2536 time.gmtime(now)[:6])
2537
2538 def test_formatdate_localtime(self):
2539 now = time.time()
2540 self.assertEqual(
2541 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2542 time.localtime(now)[:6])
2543
2544 def test_formatdate_usegmt(self):
2545 now = time.time()
2546 self.assertEqual(
2547 utils.formatdate(now, localtime=False),
2548 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2549 self.assertEqual(
2550 utils.formatdate(now, localtime=False, usegmt=True),
2551 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2552
2553 def test_parsedate_none(self):
2554 self.assertEqual(utils.parsedate(''), None)
2555
2556 def test_parsedate_compact(self):
2557 # The FWS after the comma is optional
2558 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2559 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2560
2561 def test_parsedate_no_dayofweek(self):
2562 eq = self.assertEqual
2563 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2564 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2565
2566 def test_parsedate_compact_no_dayofweek(self):
2567 eq = self.assertEqual
2568 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2569 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2570
R. David Murray4a62e892010-12-23 20:35:46 +00002571 def test_parsedate_no_space_before_positive_offset(self):
2572 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2573 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2574
2575 def test_parsedate_no_space_before_negative_offset(self):
2576 # Issue 1155362: we already handled '+' for this case.
2577 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2578 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2579
2580
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002581 def test_parsedate_acceptable_to_time_functions(self):
2582 eq = self.assertEqual
2583 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2584 t = int(time.mktime(timetup))
2585 eq(time.localtime(t)[:6], timetup[:6])
2586 eq(int(time.strftime('%Y', timetup)), 2003)
2587 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2588 t = int(time.mktime(timetup[:9]))
2589 eq(time.localtime(t)[:6], timetup[:6])
2590 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2591
R. David Murray219d1c82010-08-25 00:45:55 +00002592 def test_parsedate_y2k(self):
2593 """Test for parsing a date with a two-digit year.
2594
2595 Parsing a date with a two-digit year should return the correct
2596 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2597 obsoletes RFC822) requires four-digit years.
2598
2599 """
2600 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2601 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2602 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2603 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2604
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002605 def test_parseaddr_empty(self):
2606 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2607 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2608
2609 def test_noquote_dump(self):
2610 self.assertEqual(
2611 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2612 'A Silly Person <person@dom.ain>')
2613
2614 def test_escape_dump(self):
2615 self.assertEqual(
2616 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2617 r'"A \(Very\) Silly Person" <person@dom.ain>')
2618 a = r'A \(Special\) Person'
2619 b = 'person@dom.ain'
2620 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2621
2622 def test_escape_backslashes(self):
2623 self.assertEqual(
2624 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2625 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2626 a = r'Arthur \Backslash\ Foobar'
2627 b = 'person@dom.ain'
2628 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2629
2630 def test_name_with_dot(self):
2631 x = 'John X. Doe <jxd@example.com>'
2632 y = '"John X. Doe" <jxd@example.com>'
2633 a, b = ('John X. Doe', 'jxd@example.com')
2634 self.assertEqual(utils.parseaddr(x), (a, b))
2635 self.assertEqual(utils.parseaddr(y), (a, b))
2636 # formataddr() quotes the name if there's a dot in it
2637 self.assertEqual(utils.formataddr((a, b)), y)
2638
R. David Murray5397e862010-10-02 15:58:26 +00002639 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2640 # issue 10005. Note that in the third test the second pair of
2641 # backslashes is not actually a quoted pair because it is not inside a
2642 # comment or quoted string: the address being parsed has a quoted
2643 # string containing a quoted backslash, followed by 'example' and two
2644 # backslashes, followed by another quoted string containing a space and
2645 # the word 'example'. parseaddr copies those two backslashes
2646 # literally. Per rfc5322 this is not technically correct since a \ may
2647 # not appear in an address outside of a quoted string. It is probably
2648 # a sensible Postel interpretation, though.
2649 eq = self.assertEqual
2650 eq(utils.parseaddr('""example" example"@example.com'),
2651 ('', '""example" example"@example.com'))
2652 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2653 ('', '"\\"example\\" example"@example.com'))
2654 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2655 ('', '"\\\\"example\\\\" example"@example.com'))
2656
R. David Murray63563cd2010-12-18 18:25:38 +00002657 def test_parseaddr_preserves_spaces_in_local_part(self):
2658 # issue 9286. A normal RFC5322 local part should not contain any
2659 # folding white space, but legacy local parts can (they are a sequence
2660 # of atoms, not dotatoms). On the other hand we strip whitespace from
2661 # before the @ and around dots, on the assumption that the whitespace
2662 # around the punctuation is a mistake in what would otherwise be
2663 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2664 self.assertEqual(('', "merwok wok@xample.com"),
2665 utils.parseaddr("merwok wok@xample.com"))
2666 self.assertEqual(('', "merwok wok@xample.com"),
2667 utils.parseaddr("merwok wok@xample.com"))
2668 self.assertEqual(('', "merwok wok@xample.com"),
2669 utils.parseaddr(" merwok wok @xample.com"))
2670 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2671 utils.parseaddr('merwok"wok" wok@xample.com'))
2672 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2673 utils.parseaddr('merwok. wok . wok@xample.com'))
2674
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002675 def test_multiline_from_comment(self):
2676 x = """\
2677Foo
2678\tBar <foo@example.com>"""
2679 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2680
2681 def test_quote_dump(self):
2682 self.assertEqual(
2683 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2684 r'"A Silly; Person" <person@dom.ain>')
2685
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002686 def test_charset_richcomparisons(self):
2687 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002688 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002689 cset1 = Charset()
2690 cset2 = Charset()
2691 eq(cset1, 'us-ascii')
2692 eq(cset1, 'US-ASCII')
2693 eq(cset1, 'Us-AsCiI')
2694 eq('us-ascii', cset1)
2695 eq('US-ASCII', cset1)
2696 eq('Us-AsCiI', cset1)
2697 ne(cset1, 'usascii')
2698 ne(cset1, 'USASCII')
2699 ne(cset1, 'UsAsCiI')
2700 ne('usascii', cset1)
2701 ne('USASCII', cset1)
2702 ne('UsAsCiI', cset1)
2703 eq(cset1, cset2)
2704 eq(cset2, cset1)
2705
2706 def test_getaddresses(self):
2707 eq = self.assertEqual
2708 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2709 'Bud Person <bperson@dom.ain>']),
2710 [('Al Person', 'aperson@dom.ain'),
2711 ('Bud Person', 'bperson@dom.ain')])
2712
2713 def test_getaddresses_nasty(self):
2714 eq = self.assertEqual
2715 eq(utils.getaddresses(['foo: ;']), [('', '')])
2716 eq(utils.getaddresses(
2717 ['[]*-- =~$']),
2718 [('', ''), ('', ''), ('', '*--')])
2719 eq(utils.getaddresses(
2720 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2721 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2722
2723 def test_getaddresses_embedded_comment(self):
2724 """Test proper handling of a nested comment"""
2725 eq = self.assertEqual
2726 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2727 eq(addrs[0][1], 'foo@bar.com')
2728
2729 def test_utils_quote_unquote(self):
2730 eq = self.assertEqual
2731 msg = Message()
2732 msg.add_header('content-disposition', 'attachment',
2733 filename='foo\\wacky"name')
2734 eq(msg.get_filename(), 'foo\\wacky"name')
2735
2736 def test_get_body_encoding_with_bogus_charset(self):
2737 charset = Charset('not a charset')
2738 self.assertEqual(charset.get_body_encoding(), 'base64')
2739
2740 def test_get_body_encoding_with_uppercase_charset(self):
2741 eq = self.assertEqual
2742 msg = Message()
2743 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2744 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2745 charsets = msg.get_charsets()
2746 eq(len(charsets), 1)
2747 eq(charsets[0], 'utf-8')
2748 charset = Charset(charsets[0])
2749 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002750 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002751 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2752 eq(msg.get_payload(decode=True), b'hello world')
2753 eq(msg['content-transfer-encoding'], 'base64')
2754 # Try another one
2755 msg = Message()
2756 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2757 charsets = msg.get_charsets()
2758 eq(len(charsets), 1)
2759 eq(charsets[0], 'us-ascii')
2760 charset = Charset(charsets[0])
2761 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2762 msg.set_payload('hello world', charset=charset)
2763 eq(msg.get_payload(), 'hello world')
2764 eq(msg['content-transfer-encoding'], '7bit')
2765
2766 def test_charsets_case_insensitive(self):
2767 lc = Charset('us-ascii')
2768 uc = Charset('US-ASCII')
2769 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2770
2771 def test_partial_falls_inside_message_delivery_status(self):
2772 eq = self.ndiffAssertEqual
2773 # The Parser interface provides chunks of data to FeedParser in 8192
2774 # byte gulps. SF bug #1076485 found one of those chunks inside
2775 # message/delivery-status header block, which triggered an
2776 # unreadline() of NeedMoreData.
2777 msg = self._msgobj('msg_43.txt')
2778 sfp = StringIO()
2779 iterators._structure(msg, sfp)
2780 eq(sfp.getvalue(), """\
2781multipart/report
2782 text/plain
2783 message/delivery-status
2784 text/plain
2785 text/plain
2786 text/plain
2787 text/plain
2788 text/plain
2789 text/plain
2790 text/plain
2791 text/plain
2792 text/plain
2793 text/plain
2794 text/plain
2795 text/plain
2796 text/plain
2797 text/plain
2798 text/plain
2799 text/plain
2800 text/plain
2801 text/plain
2802 text/plain
2803 text/plain
2804 text/plain
2805 text/plain
2806 text/plain
2807 text/plain
2808 text/plain
2809 text/plain
2810 text/rfc822-headers
2811""")
2812
R. David Murraya0b44b52010-12-02 21:47:19 +00002813 def test_make_msgid_domain(self):
2814 self.assertEqual(
2815 email.utils.make_msgid(domain='testdomain-string')[-19:],
2816 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002817
Ezio Melottib3aedd42010-11-20 19:04:17 +00002818
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002819# Test the iterator/generators
2820class TestIterators(TestEmailBase):
2821 def test_body_line_iterator(self):
2822 eq = self.assertEqual
2823 neq = self.ndiffAssertEqual
2824 # First a simple non-multipart message
2825 msg = self._msgobj('msg_01.txt')
2826 it = iterators.body_line_iterator(msg)
2827 lines = list(it)
2828 eq(len(lines), 6)
2829 neq(EMPTYSTRING.join(lines), msg.get_payload())
2830 # Now a more complicated multipart
2831 msg = self._msgobj('msg_02.txt')
2832 it = iterators.body_line_iterator(msg)
2833 lines = list(it)
2834 eq(len(lines), 43)
2835 with openfile('msg_19.txt') as fp:
2836 neq(EMPTYSTRING.join(lines), fp.read())
2837
2838 def test_typed_subpart_iterator(self):
2839 eq = self.assertEqual
2840 msg = self._msgobj('msg_04.txt')
2841 it = iterators.typed_subpart_iterator(msg, 'text')
2842 lines = []
2843 subparts = 0
2844 for subpart in it:
2845 subparts += 1
2846 lines.append(subpart.get_payload())
2847 eq(subparts, 2)
2848 eq(EMPTYSTRING.join(lines), """\
2849a simple kind of mirror
2850to reflect upon our own
2851a simple kind of mirror
2852to reflect upon our own
2853""")
2854
2855 def test_typed_subpart_iterator_default_type(self):
2856 eq = self.assertEqual
2857 msg = self._msgobj('msg_03.txt')
2858 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2859 lines = []
2860 subparts = 0
2861 for subpart in it:
2862 subparts += 1
2863 lines.append(subpart.get_payload())
2864 eq(subparts, 1)
2865 eq(EMPTYSTRING.join(lines), """\
2866
2867Hi,
2868
2869Do you like this message?
2870
2871-Me
2872""")
2873
R. David Murray45bf773f2010-07-17 01:19:57 +00002874 def test_pushCR_LF(self):
2875 '''FeedParser BufferedSubFile.push() assumed it received complete
2876 line endings. A CR ending one push() followed by a LF starting
2877 the next push() added an empty line.
2878 '''
2879 imt = [
2880 ("a\r \n", 2),
2881 ("b", 0),
2882 ("c\n", 1),
2883 ("", 0),
2884 ("d\r\n", 1),
2885 ("e\r", 0),
2886 ("\nf", 1),
2887 ("\r\n", 1),
2888 ]
2889 from email.feedparser import BufferedSubFile, NeedMoreData
2890 bsf = BufferedSubFile()
2891 om = []
2892 nt = 0
2893 for il, n in imt:
2894 bsf.push(il)
2895 nt += n
2896 n1 = 0
2897 while True:
2898 ol = bsf.readline()
2899 if ol == NeedMoreData:
2900 break
2901 om.append(ol)
2902 n1 += 1
2903 self.assertTrue(n == n1)
2904 self.assertTrue(len(om) == nt)
2905 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2906
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002907
Ezio Melottib3aedd42010-11-20 19:04:17 +00002908
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002909class TestParsers(TestEmailBase):
2910 def test_header_parser(self):
2911 eq = self.assertEqual
2912 # Parse only the headers of a complex multipart MIME document
2913 with openfile('msg_02.txt') as fp:
2914 msg = HeaderParser().parse(fp)
2915 eq(msg['from'], 'ppp-request@zzz.org')
2916 eq(msg['to'], 'ppp@zzz.org')
2917 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002918 self.assertFalse(msg.is_multipart())
2919 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002920
2921 def test_whitespace_continuation(self):
2922 eq = self.assertEqual
2923 # This message contains a line after the Subject: header that has only
2924 # whitespace, but it is not empty!
2925 msg = email.message_from_string("""\
2926From: aperson@dom.ain
2927To: bperson@dom.ain
2928Subject: the next line has a space on it
2929\x20
2930Date: Mon, 8 Apr 2002 15:09:19 -0400
2931Message-ID: spam
2932
2933Here's the message body
2934""")
2935 eq(msg['subject'], 'the next line has a space on it\n ')
2936 eq(msg['message-id'], 'spam')
2937 eq(msg.get_payload(), "Here's the message body\n")
2938
2939 def test_whitespace_continuation_last_header(self):
2940 eq = self.assertEqual
2941 # Like the previous test, but the subject line is the last
2942 # header.
2943 msg = email.message_from_string("""\
2944From: aperson@dom.ain
2945To: bperson@dom.ain
2946Date: Mon, 8 Apr 2002 15:09:19 -0400
2947Message-ID: spam
2948Subject: the next line has a space on it
2949\x20
2950
2951Here's the message body
2952""")
2953 eq(msg['subject'], 'the next line has a space on it\n ')
2954 eq(msg['message-id'], 'spam')
2955 eq(msg.get_payload(), "Here's the message body\n")
2956
2957 def test_crlf_separation(self):
2958 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002959 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002960 msg = Parser().parse(fp)
2961 eq(len(msg.get_payload()), 2)
2962 part1 = msg.get_payload(0)
2963 eq(part1.get_content_type(), 'text/plain')
2964 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2965 part2 = msg.get_payload(1)
2966 eq(part2.get_content_type(), 'application/riscos')
2967
R. David Murray8451c4b2010-10-23 22:19:56 +00002968 def test_crlf_flatten(self):
2969 # Using newline='\n' preserves the crlfs in this input file.
2970 with openfile('msg_26.txt', newline='\n') as fp:
2971 text = fp.read()
2972 msg = email.message_from_string(text)
2973 s = StringIO()
2974 g = Generator(s)
2975 g.flatten(msg, linesep='\r\n')
2976 self.assertEqual(s.getvalue(), text)
2977
2978 maxDiff = None
2979
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002980 def test_multipart_digest_with_extra_mime_headers(self):
2981 eq = self.assertEqual
2982 neq = self.ndiffAssertEqual
2983 with openfile('msg_28.txt') as fp:
2984 msg = email.message_from_file(fp)
2985 # Structure is:
2986 # multipart/digest
2987 # message/rfc822
2988 # text/plain
2989 # message/rfc822
2990 # text/plain
2991 eq(msg.is_multipart(), 1)
2992 eq(len(msg.get_payload()), 2)
2993 part1 = msg.get_payload(0)
2994 eq(part1.get_content_type(), 'message/rfc822')
2995 eq(part1.is_multipart(), 1)
2996 eq(len(part1.get_payload()), 1)
2997 part1a = part1.get_payload(0)
2998 eq(part1a.is_multipart(), 0)
2999 eq(part1a.get_content_type(), 'text/plain')
3000 neq(part1a.get_payload(), 'message 1\n')
3001 # next message/rfc822
3002 part2 = msg.get_payload(1)
3003 eq(part2.get_content_type(), 'message/rfc822')
3004 eq(part2.is_multipart(), 1)
3005 eq(len(part2.get_payload()), 1)
3006 part2a = part2.get_payload(0)
3007 eq(part2a.is_multipart(), 0)
3008 eq(part2a.get_content_type(), 'text/plain')
3009 neq(part2a.get_payload(), 'message 2\n')
3010
3011 def test_three_lines(self):
3012 # A bug report by Andrew McNamara
3013 lines = ['From: Andrew Person <aperson@dom.ain',
3014 'Subject: Test',
3015 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3016 msg = email.message_from_string(NL.join(lines))
3017 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3018
3019 def test_strip_line_feed_and_carriage_return_in_headers(self):
3020 eq = self.assertEqual
3021 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3022 value1 = 'text'
3023 value2 = 'more text'
3024 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3025 value1, value2)
3026 msg = email.message_from_string(m)
3027 eq(msg.get('Header'), value1)
3028 eq(msg.get('Next-Header'), value2)
3029
3030 def test_rfc2822_header_syntax(self):
3031 eq = self.assertEqual
3032 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3033 msg = email.message_from_string(m)
3034 eq(len(msg), 3)
3035 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3036 eq(msg.get_payload(), 'body')
3037
3038 def test_rfc2822_space_not_allowed_in_header(self):
3039 eq = self.assertEqual
3040 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3041 msg = email.message_from_string(m)
3042 eq(len(msg.keys()), 0)
3043
3044 def test_rfc2822_one_character_header(self):
3045 eq = self.assertEqual
3046 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3047 msg = email.message_from_string(m)
3048 headers = msg.keys()
3049 headers.sort()
3050 eq(headers, ['A', 'B', 'CC'])
3051 eq(msg.get_payload(), 'body')
3052
R. David Murray45e0e142010-06-16 02:19:40 +00003053 def test_CRLFLF_at_end_of_part(self):
3054 # issue 5610: feedparser should not eat two chars from body part ending
3055 # with "\r\n\n".
3056 m = (
3057 "From: foo@bar.com\n"
3058 "To: baz\n"
3059 "Mime-Version: 1.0\n"
3060 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3061 "\n"
3062 "--BOUNDARY\n"
3063 "Content-Type: text/plain\n"
3064 "\n"
3065 "body ending with CRLF newline\r\n"
3066 "\n"
3067 "--BOUNDARY--\n"
3068 )
3069 msg = email.message_from_string(m)
3070 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003071
Ezio Melottib3aedd42010-11-20 19:04:17 +00003072
R. David Murray96fd54e2010-10-08 15:55:28 +00003073class Test8BitBytesHandling(unittest.TestCase):
3074 # In Python3 all input is string, but that doesn't work if the actual input
3075 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3076 # decode byte streams using the surrogateescape error handler, and
3077 # reconvert to binary at appropriate places if we detect surrogates. This
3078 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3079 # but it does allow us to parse and preserve them, and to decode body
3080 # parts that use an 8bit CTE.
3081
3082 bodytest_msg = textwrap.dedent("""\
3083 From: foo@bar.com
3084 To: baz
3085 Mime-Version: 1.0
3086 Content-Type: text/plain; charset={charset}
3087 Content-Transfer-Encoding: {cte}
3088
3089 {bodyline}
3090 """)
3091
3092 def test_known_8bit_CTE(self):
3093 m = self.bodytest_msg.format(charset='utf-8',
3094 cte='8bit',
3095 bodyline='pöstal').encode('utf-8')
3096 msg = email.message_from_bytes(m)
3097 self.assertEqual(msg.get_payload(), "pöstal\n")
3098 self.assertEqual(msg.get_payload(decode=True),
3099 "pöstal\n".encode('utf-8'))
3100
3101 def test_unknown_8bit_CTE(self):
3102 m = self.bodytest_msg.format(charset='notavalidcharset',
3103 cte='8bit',
3104 bodyline='pöstal').encode('utf-8')
3105 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003106 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003107 self.assertEqual(msg.get_payload(decode=True),
3108 "pöstal\n".encode('utf-8'))
3109
3110 def test_8bit_in_quopri_body(self):
3111 # This is non-RFC compliant data...without 'decode' the library code
3112 # decodes the body using the charset from the headers, and because the
3113 # source byte really is utf-8 this works. This is likely to fail
3114 # against real dirty data (ie: produce mojibake), but the data is
3115 # invalid anyway so it is as good a guess as any. But this means that
3116 # this test just confirms the current behavior; that behavior is not
3117 # necessarily the best possible behavior. With 'decode' it is
3118 # returning the raw bytes, so that test should be of correct behavior,
3119 # or at least produce the same result that email4 did.
3120 m = self.bodytest_msg.format(charset='utf-8',
3121 cte='quoted-printable',
3122 bodyline='p=C3=B6stál').encode('utf-8')
3123 msg = email.message_from_bytes(m)
3124 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3125 self.assertEqual(msg.get_payload(decode=True),
3126 'pöstál\n'.encode('utf-8'))
3127
3128 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3129 # This is similar to the previous test, but proves that if the 8bit
3130 # byte is undecodeable in the specified charset, it gets replaced
3131 # by the unicode 'unknown' character. Again, this may or may not
3132 # be the ideal behavior. Note that if decode=False none of the
3133 # decoders will get involved, so this is the only test we need
3134 # for this behavior.
3135 m = self.bodytest_msg.format(charset='ascii',
3136 cte='quoted-printable',
3137 bodyline='p=C3=B6stál').encode('utf-8')
3138 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003139 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003140 self.assertEqual(msg.get_payload(decode=True),
3141 'pöstál\n'.encode('utf-8'))
3142
3143 def test_8bit_in_base64_body(self):
3144 # Sticking an 8bit byte in a base64 block makes it undecodable by
3145 # normal means, so the block is returned undecoded, but as bytes.
3146 m = self.bodytest_msg.format(charset='utf-8',
3147 cte='base64',
3148 bodyline='cMO2c3RhbAá=').encode('utf-8')
3149 msg = email.message_from_bytes(m)
3150 self.assertEqual(msg.get_payload(decode=True),
3151 'cMO2c3RhbAá=\n'.encode('utf-8'))
3152
3153 def test_8bit_in_uuencode_body(self):
3154 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3155 # normal means, so the block is returned undecoded, but as bytes.
3156 m = self.bodytest_msg.format(charset='utf-8',
3157 cte='uuencode',
3158 bodyline='<,.V<W1A; á ').encode('utf-8')
3159 msg = email.message_from_bytes(m)
3160 self.assertEqual(msg.get_payload(decode=True),
3161 '<,.V<W1A; á \n'.encode('utf-8'))
3162
3163
R. David Murray92532142011-01-07 23:25:30 +00003164 headertest_headers = (
3165 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3166 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3167 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3168 '\tJean de Baddie',
3169 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3170 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3171 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3172 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3173 )
3174 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3175 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003176
3177 def test_get_8bit_header(self):
3178 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003179 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3180 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003181
3182 def test_print_8bit_headers(self):
3183 msg = email.message_from_bytes(self.headertest_msg)
3184 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003185 textwrap.dedent("""\
3186 From: {}
3187 To: {}
3188 Subject: {}
3189 From: {}
3190
3191 Yes, they are flying.
3192 """).format(*[expected[1] for (_, expected) in
3193 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003194
3195 def test_values_with_8bit_headers(self):
3196 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003197 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003198 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003199 'b\uFFFD\uFFFDz',
3200 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3201 'coll\uFFFD\uFFFDgue, le pouf '
3202 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003203 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003204 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003205
3206 def test_items_with_8bit_headers(self):
3207 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003208 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003209 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003210 ('To', 'b\uFFFD\uFFFDz'),
3211 ('Subject', 'Maintenant je vous '
3212 'pr\uFFFD\uFFFDsente '
3213 'mon coll\uFFFD\uFFFDgue, le pouf '
3214 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3215 '\tJean de Baddie'),
3216 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003217
3218 def test_get_all_with_8bit_headers(self):
3219 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003220 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003221 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003222 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003223
R David Murraya2150232011-03-16 21:11:23 -04003224 def test_get_content_type_with_8bit(self):
3225 msg = email.message_from_bytes(textwrap.dedent("""\
3226 Content-Type: text/pl\xA7in; charset=utf-8
3227 """).encode('latin-1'))
3228 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3229 self.assertEqual(msg.get_content_maintype(), "text")
3230 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3231
3232 def test_get_params_with_8bit(self):
3233 msg = email.message_from_bytes(
3234 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3235 self.assertEqual(msg.get_params(header='x-header'),
3236 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3237 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3238 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3239 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3240
3241 def test_get_rfc2231_params_with_8bit(self):
3242 msg = email.message_from_bytes(textwrap.dedent("""\
3243 Content-Type: text/plain; charset=us-ascii;
3244 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3245 ).encode('latin-1'))
3246 self.assertEqual(msg.get_param('title'),
3247 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3248
3249 def test_set_rfc2231_params_with_8bit(self):
3250 msg = email.message_from_bytes(textwrap.dedent("""\
3251 Content-Type: text/plain; charset=us-ascii;
3252 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3253 ).encode('latin-1'))
3254 msg.set_param('title', 'test')
3255 self.assertEqual(msg.get_param('title'), 'test')
3256
3257 def test_del_rfc2231_params_with_8bit(self):
3258 msg = email.message_from_bytes(textwrap.dedent("""\
3259 Content-Type: text/plain; charset=us-ascii;
3260 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3261 ).encode('latin-1'))
3262 msg.del_param('title')
3263 self.assertEqual(msg.get_param('title'), None)
3264 self.assertEqual(msg.get_content_maintype(), 'text')
3265
3266 def test_get_payload_with_8bit_cte_header(self):
3267 msg = email.message_from_bytes(textwrap.dedent("""\
3268 Content-Transfer-Encoding: b\xa7se64
3269 Content-Type: text/plain; charset=latin-1
3270
3271 payload
3272 """).encode('latin-1'))
3273 self.assertEqual(msg.get_payload(), 'payload\n')
3274 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3275
R. David Murray96fd54e2010-10-08 15:55:28 +00003276 non_latin_bin_msg = textwrap.dedent("""\
3277 From: foo@bar.com
3278 To: báz
3279 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3280 \tJean de Baddie
3281 Mime-Version: 1.0
3282 Content-Type: text/plain; charset="utf-8"
3283 Content-Transfer-Encoding: 8bit
3284
3285 Да, они летят.
3286 """).encode('utf-8')
3287
3288 def test_bytes_generator(self):
3289 msg = email.message_from_bytes(self.non_latin_bin_msg)
3290 out = BytesIO()
3291 email.generator.BytesGenerator(out).flatten(msg)
3292 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3293
R. David Murray7372a072011-01-26 21:21:32 +00003294 def test_bytes_generator_handles_None_body(self):
3295 #Issue 11019
3296 msg = email.message.Message()
3297 out = BytesIO()
3298 email.generator.BytesGenerator(out).flatten(msg)
3299 self.assertEqual(out.getvalue(), b"\n")
3300
R. David Murray92532142011-01-07 23:25:30 +00003301 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003302 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003303 To: =?unknown-8bit?q?b=C3=A1z?=
3304 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3305 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3306 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003307 Mime-Version: 1.0
3308 Content-Type: text/plain; charset="utf-8"
3309 Content-Transfer-Encoding: base64
3310
3311 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3312 """)
3313
3314 def test_generator_handles_8bit(self):
3315 msg = email.message_from_bytes(self.non_latin_bin_msg)
3316 out = StringIO()
3317 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003318 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003319
3320 def test_bytes_generator_with_unix_from(self):
3321 # The unixfrom contains a current date, so we can't check it
3322 # literally. Just make sure the first word is 'From' and the
3323 # rest of the message matches the input.
3324 msg = email.message_from_bytes(self.non_latin_bin_msg)
3325 out = BytesIO()
3326 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3327 lines = out.getvalue().split(b'\n')
3328 self.assertEqual(lines[0].split()[0], b'From')
3329 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3330
R. David Murray92532142011-01-07 23:25:30 +00003331 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3332 non_latin_bin_msg_as7bit[2:4] = [
3333 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3334 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3335 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3336
R. David Murray96fd54e2010-10-08 15:55:28 +00003337 def test_message_from_binary_file(self):
3338 fn = 'test.msg'
3339 self.addCleanup(unlink, fn)
3340 with open(fn, 'wb') as testfile:
3341 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003342 with open(fn, 'rb') as testfile:
3343 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003344 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3345
3346 latin_bin_msg = textwrap.dedent("""\
3347 From: foo@bar.com
3348 To: Dinsdale
3349 Subject: Nudge nudge, wink, wink
3350 Mime-Version: 1.0
3351 Content-Type: text/plain; charset="latin-1"
3352 Content-Transfer-Encoding: 8bit
3353
3354 oh là là, know what I mean, know what I mean?
3355 """).encode('latin-1')
3356
3357 latin_bin_msg_as7bit = textwrap.dedent("""\
3358 From: foo@bar.com
3359 To: Dinsdale
3360 Subject: Nudge nudge, wink, wink
3361 Mime-Version: 1.0
3362 Content-Type: text/plain; charset="iso-8859-1"
3363 Content-Transfer-Encoding: quoted-printable
3364
3365 oh l=E0 l=E0, know what I mean, know what I mean?
3366 """)
3367
3368 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3369 m = email.message_from_bytes(self.latin_bin_msg)
3370 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3371
3372 def test_decoded_generator_emits_unicode_body(self):
3373 m = email.message_from_bytes(self.latin_bin_msg)
3374 out = StringIO()
3375 email.generator.DecodedGenerator(out).flatten(m)
3376 #DecodedHeader output contains an extra blank line compared
3377 #to the input message. RDM: not sure if this is a bug or not,
3378 #but it is not specific to the 8bit->7bit conversion.
3379 self.assertEqual(out.getvalue(),
3380 self.latin_bin_msg.decode('latin-1')+'\n')
3381
3382 def test_bytes_feedparser(self):
3383 bfp = email.feedparser.BytesFeedParser()
3384 for i in range(0, len(self.latin_bin_msg), 10):
3385 bfp.feed(self.latin_bin_msg[i:i+10])
3386 m = bfp.close()
3387 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3388
R. David Murray8451c4b2010-10-23 22:19:56 +00003389 def test_crlf_flatten(self):
3390 with openfile('msg_26.txt', 'rb') as fp:
3391 text = fp.read()
3392 msg = email.message_from_bytes(text)
3393 s = BytesIO()
3394 g = email.generator.BytesGenerator(s)
3395 g.flatten(msg, linesep='\r\n')
3396 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003397
3398 def test_8bit_multipart(self):
3399 # Issue 11605
3400 source = textwrap.dedent("""\
3401 Date: Fri, 18 Mar 2011 17:15:43 +0100
3402 To: foo@example.com
3403 From: foodwatch-Newsletter <bar@example.com>
3404 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3405 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3406 MIME-Version: 1.0
3407 Content-Type: multipart/alternative;
3408 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3409
3410 --b1_76a486bee62b0d200f33dc2ca08220ad
3411 Content-Type: text/plain; charset="utf-8"
3412 Content-Transfer-Encoding: 8bit
3413
3414 Guten Tag, ,
3415
3416 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3417 Nachrichten aus Japan.
3418
3419
3420 --b1_76a486bee62b0d200f33dc2ca08220ad
3421 Content-Type: text/html; charset="utf-8"
3422 Content-Transfer-Encoding: 8bit
3423
3424 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3425 "http://www.w3.org/TR/html4/loose.dtd">
3426 <html lang="de">
3427 <head>
3428 <title>foodwatch - Newsletter</title>
3429 </head>
3430 <body>
3431 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3432 die Nachrichten aus Japan.</p>
3433 </body>
3434 </html>
3435 --b1_76a486bee62b0d200f33dc2ca08220ad--
3436
3437 """).encode('utf-8')
3438 msg = email.message_from_bytes(source)
3439 s = BytesIO()
3440 g = email.generator.BytesGenerator(s)
3441 g.flatten(msg)
3442 self.assertEqual(s.getvalue(), source)
3443
R. David Murray8451c4b2010-10-23 22:19:56 +00003444 maxDiff = None
3445
Ezio Melottib3aedd42010-11-20 19:04:17 +00003446
R. David Murray719a4492010-11-21 16:53:48 +00003447class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003448
R. David Murraye5db2632010-11-20 15:10:13 +00003449 maxDiff = None
3450
R. David Murray96fd54e2010-10-08 15:55:28 +00003451 def _msgobj(self, filename):
3452 with openfile(filename, 'rb') as fp:
3453 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003454 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003455 msg = email.message_from_bytes(data)
3456 return msg, data
3457
R. David Murray719a4492010-11-21 16:53:48 +00003458 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003459 b = BytesIO()
3460 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003461 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R. David Murraye5db2632010-11-20 15:10:13 +00003462 self.assertByteStringsEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003463
R. David Murraye5db2632010-11-20 15:10:13 +00003464 def assertByteStringsEqual(self, str1, str2):
R. David Murray719a4492010-11-21 16:53:48 +00003465 # Not using self.blinesep here is intentional. This way the output
3466 # is more useful when the failure results in mixed line endings.
R. David Murray96fd54e2010-10-08 15:55:28 +00003467 self.assertListEqual(str1.split(b'\n'), str2.split(b'\n'))
3468
3469
R. David Murray719a4492010-11-21 16:53:48 +00003470class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3471 TestIdempotent):
3472 linesep = '\n'
3473 blinesep = b'\n'
3474 normalize_linesep_regex = re.compile(br'\r\n')
3475
3476
3477class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3478 TestIdempotent):
3479 linesep = '\r\n'
3480 blinesep = b'\r\n'
3481 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3482
Ezio Melottib3aedd42010-11-20 19:04:17 +00003483
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003484class TestBase64(unittest.TestCase):
3485 def test_len(self):
3486 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003487 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003488 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003489 for size in range(15):
3490 if size == 0 : bsize = 0
3491 elif size <= 3 : bsize = 4
3492 elif size <= 6 : bsize = 8
3493 elif size <= 9 : bsize = 12
3494 elif size <= 12: bsize = 16
3495 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003496 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003497
3498 def test_decode(self):
3499 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003500 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003501 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003502
3503 def test_encode(self):
3504 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003505 eq(base64mime.body_encode(b''), b'')
3506 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003507 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003508 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003509 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003510 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003511eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3512eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3513eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3514eHh4eCB4eHh4IA==
3515""")
3516 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003517 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003518 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003519eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3520eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3521eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3522eHh4eCB4eHh4IA==\r
3523""")
3524
3525 def test_header_encode(self):
3526 eq = self.assertEqual
3527 he = base64mime.header_encode
3528 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003529 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3530 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003531 # Test the charset option
3532 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3533 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003534
3535
Ezio Melottib3aedd42010-11-20 19:04:17 +00003536
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003537class TestQuopri(unittest.TestCase):
3538 def setUp(self):
3539 # Set of characters (as byte integers) that don't need to be encoded
3540 # in headers.
3541 self.hlit = list(chain(
3542 range(ord('a'), ord('z') + 1),
3543 range(ord('A'), ord('Z') + 1),
3544 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003545 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003546 # Set of characters (as byte integers) that do need to be encoded in
3547 # headers.
3548 self.hnon = [c for c in range(256) if c not in self.hlit]
3549 assert len(self.hlit) + len(self.hnon) == 256
3550 # Set of characters (as byte integers) that don't need to be encoded
3551 # in bodies.
3552 self.blit = list(range(ord(' '), ord('~') + 1))
3553 self.blit.append(ord('\t'))
3554 self.blit.remove(ord('='))
3555 # Set of characters (as byte integers) that do need to be encoded in
3556 # bodies.
3557 self.bnon = [c for c in range(256) if c not in self.blit]
3558 assert len(self.blit) + len(self.bnon) == 256
3559
Guido van Rossum9604e662007-08-30 03:46:43 +00003560 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003561 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003562 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003563 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003564 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003565 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003566 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003567
Guido van Rossum9604e662007-08-30 03:46:43 +00003568 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003569 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003570 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003571 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003572 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003573 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003574 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003575
3576 def test_header_quopri_len(self):
3577 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003578 eq(quoprimime.header_length(b'hello'), 5)
3579 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003580 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003581 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003582 # =?xxx?q?...?= means 10 extra characters
3583 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003584 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3585 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003586 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003587 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003588 # =?xxx?q?...?= means 10 extra characters
3589 10)
3590 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003591 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003592 'expected length 1 for %r' % chr(c))
3593 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003594 # Space is special; it's encoded to _
3595 if c == ord(' '):
3596 continue
3597 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003598 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003599 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003600
3601 def test_body_quopri_len(self):
3602 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003603 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003604 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003605 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003606 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003607
3608 def test_quote_unquote_idempotent(self):
3609 for x in range(256):
3610 c = chr(x)
3611 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3612
R David Murrayec1b5b82011-03-23 14:19:05 -04003613 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3614 if charset is None:
3615 encoded_header = quoprimime.header_encode(header)
3616 else:
3617 encoded_header = quoprimime.header_encode(header, charset)
3618 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003619
R David Murraycafd79d2011-03-23 15:25:55 -04003620 def test_header_encode_null(self):
3621 self._test_header_encode(b'', '')
3622
R David Murrayec1b5b82011-03-23 14:19:05 -04003623 def test_header_encode_one_word(self):
3624 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3625
3626 def test_header_encode_two_lines(self):
3627 self._test_header_encode(b'hello\nworld',
3628 '=?iso-8859-1?q?hello=0Aworld?=')
3629
3630 def test_header_encode_non_ascii(self):
3631 self._test_header_encode(b'hello\xc7there',
3632 '=?iso-8859-1?q?hello=C7there?=')
3633
3634 def test_header_encode_alt_charset(self):
3635 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3636 charset='iso-8859-2')
3637
3638 def _test_header_decode(self, encoded_header, expected_decoded_header):
3639 decoded_header = quoprimime.header_decode(encoded_header)
3640 self.assertEqual(decoded_header, expected_decoded_header)
3641
3642 def test_header_decode_null(self):
3643 self._test_header_decode('', '')
3644
3645 def test_header_decode_one_word(self):
3646 self._test_header_decode('hello', 'hello')
3647
3648 def test_header_decode_two_lines(self):
3649 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3650
3651 def test_header_decode_non_ascii(self):
3652 self._test_header_decode('hello=C7there', 'hello\xc7there')
3653
3654 def _test_decode(self, encoded, expected_decoded, eol=None):
3655 if eol is None:
3656 decoded = quoprimime.decode(encoded)
3657 else:
3658 decoded = quoprimime.decode(encoded, eol=eol)
3659 self.assertEqual(decoded, expected_decoded)
3660
3661 def test_decode_null_word(self):
3662 self._test_decode('', '')
3663
3664 def test_decode_null_line_null_word(self):
3665 self._test_decode('\r\n', '\n')
3666
3667 def test_decode_one_word(self):
3668 self._test_decode('hello', 'hello')
3669
3670 def test_decode_one_word_eol(self):
3671 self._test_decode('hello', 'hello', eol='X')
3672
3673 def test_decode_one_line(self):
3674 self._test_decode('hello\r\n', 'hello\n')
3675
3676 def test_decode_one_line_lf(self):
3677 self._test_decode('hello\n', 'hello\n')
3678
R David Murraycafd79d2011-03-23 15:25:55 -04003679 def test_decode_one_line_cr(self):
3680 self._test_decode('hello\r', 'hello\n')
3681
3682 def test_decode_one_line_nl(self):
3683 self._test_decode('hello\n', 'helloX', eol='X')
3684
3685 def test_decode_one_line_crnl(self):
3686 self._test_decode('hello\r\n', 'helloX', eol='X')
3687
R David Murrayec1b5b82011-03-23 14:19:05 -04003688 def test_decode_one_line_one_word(self):
3689 self._test_decode('hello\r\nworld', 'hello\nworld')
3690
3691 def test_decode_one_line_one_word_eol(self):
3692 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3693
3694 def test_decode_two_lines(self):
3695 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3696
R David Murraycafd79d2011-03-23 15:25:55 -04003697 def test_decode_two_lines_eol(self):
3698 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3699
R David Murrayec1b5b82011-03-23 14:19:05 -04003700 def test_decode_one_long_line(self):
3701 self._test_decode('Spam' * 250, 'Spam' * 250)
3702
3703 def test_decode_one_space(self):
3704 self._test_decode(' ', '')
3705
3706 def test_decode_multiple_spaces(self):
3707 self._test_decode(' ' * 5, '')
3708
3709 def test_decode_one_line_trailing_spaces(self):
3710 self._test_decode('hello \r\n', 'hello\n')
3711
3712 def test_decode_two_lines_trailing_spaces(self):
3713 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3714
3715 def test_decode_quoted_word(self):
3716 self._test_decode('=22quoted=20words=22', '"quoted words"')
3717
3718 def test_decode_uppercase_quoting(self):
3719 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
3720
3721 def test_decode_lowercase_quoting(self):
3722 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
3723
3724 def test_decode_soft_line_break(self):
3725 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
3726
3727 def test_decode_false_quoting(self):
3728 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
3729
3730 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
3731 kwargs = {}
3732 if maxlinelen is None:
3733 # Use body_encode's default.
3734 maxlinelen = 76
3735 else:
3736 kwargs['maxlinelen'] = maxlinelen
3737 if eol is None:
3738 # Use body_encode's default.
3739 eol = '\n'
3740 else:
3741 kwargs['eol'] = eol
3742 encoded_body = quoprimime.body_encode(body, **kwargs)
3743 self.assertEqual(encoded_body, expected_encoded_body)
3744 if eol == '\n' or eol == '\r\n':
3745 # We know how to split the result back into lines, so maxlinelen
3746 # can be checked.
3747 for line in encoded_body.splitlines():
3748 self.assertLessEqual(len(line), maxlinelen)
3749
3750 def test_encode_null(self):
3751 self._test_encode('', '')
3752
3753 def test_encode_null_lines(self):
3754 self._test_encode('\n\n', '\n\n')
3755
3756 def test_encode_one_line(self):
3757 self._test_encode('hello\n', 'hello\n')
3758
3759 def test_encode_one_line_crlf(self):
3760 self._test_encode('hello\r\n', 'hello\n')
3761
3762 def test_encode_one_line_eol(self):
3763 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
3764
3765 def test_encode_one_space(self):
3766 self._test_encode(' ', '=20')
3767
3768 def test_encode_one_line_one_space(self):
3769 self._test_encode(' \n', '=20\n')
3770
R David Murrayb938c8c2011-03-24 12:19:26 -04003771# XXX: body_encode() expect strings, but uses ord(char) from these strings
3772# to index into a 256-entry list. For code points above 255, this will fail.
3773# Should there be a check for 8-bit only ord() values in body, or at least
3774# a comment about the expected input?
3775
3776 def test_encode_two_lines_one_space(self):
3777 self._test_encode(' \n \n', '=20\n=20\n')
3778
R David Murrayec1b5b82011-03-23 14:19:05 -04003779 def test_encode_one_word_trailing_spaces(self):
3780 self._test_encode('hello ', 'hello =20')
3781
3782 def test_encode_one_line_trailing_spaces(self):
3783 self._test_encode('hello \n', 'hello =20\n')
3784
3785 def test_encode_one_word_trailing_tab(self):
3786 self._test_encode('hello \t', 'hello =09')
3787
3788 def test_encode_one_line_trailing_tab(self):
3789 self._test_encode('hello \t\n', 'hello =09\n')
3790
3791 def test_encode_trailing_space_before_maxlinelen(self):
3792 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
3793
R David Murrayb938c8c2011-03-24 12:19:26 -04003794 def test_encode_trailing_space_at_maxlinelen(self):
3795 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
3796
R David Murrayec1b5b82011-03-23 14:19:05 -04003797 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04003798 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
3799
3800 def test_encode_whitespace_lines(self):
3801 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04003802
3803 def test_encode_quoted_equals(self):
3804 self._test_encode('a = b', 'a =3D b')
3805
3806 def test_encode_one_long_string(self):
3807 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
3808
3809 def test_encode_one_long_line(self):
3810 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3811
3812 def test_encode_one_very_long_line(self):
3813 self._test_encode('x' * 200 + '\n',
3814 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
3815
3816 def test_encode_one_long_line(self):
3817 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3818
3819 def test_encode_shortest_maxlinelen(self):
3820 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003821
R David Murrayb938c8c2011-03-24 12:19:26 -04003822 def test_encode_maxlinelen_too_small(self):
3823 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
3824
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003825 def test_encode(self):
3826 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003827 eq(quoprimime.body_encode(''), '')
3828 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003829 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003830 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003831 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003832 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003833xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3834 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3835x xxxx xxxx xxxx xxxx=20""")
3836 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003837 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3838 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003839xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3840 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3841x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003842 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003843one line
3844
3845two line"""), """\
3846one line
3847
3848two line""")
3849
3850
Ezio Melottib3aedd42010-11-20 19:04:17 +00003851
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003852# Test the Charset class
3853class TestCharset(unittest.TestCase):
3854 def tearDown(self):
3855 from email import charset as CharsetModule
3856 try:
3857 del CharsetModule.CHARSETS['fake']
3858 except KeyError:
3859 pass
3860
Guido van Rossum9604e662007-08-30 03:46:43 +00003861 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003862 eq = self.assertEqual
3863 # Make sure us-ascii = no Unicode conversion
3864 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00003865 eq(c.header_encode('Hello World!'), 'Hello World!')
3866 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003867 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00003868 self.assertRaises(UnicodeError, c.header_encode, s)
3869 c = Charset('utf-8')
3870 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003871
3872 def test_body_encode(self):
3873 eq = self.assertEqual
3874 # Try a charset with QP body encoding
3875 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003876 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003877 # Try a charset with Base64 body encoding
3878 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003879 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003880 # Try a charset with None body encoding
3881 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003882 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003883 # Try the convert argument, where input codec != output codec
3884 c = Charset('euc-jp')
3885 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00003886 # XXX FIXME
3887## try:
3888## eq('\x1b$B5FCO;~IW\x1b(B',
3889## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
3890## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
3891## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
3892## except LookupError:
3893## # We probably don't have the Japanese codecs installed
3894## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003895 # Testing SF bug #625509, which we have to fake, since there are no
3896 # built-in encodings where the header encoding is QP but the body
3897 # encoding is not.
3898 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04003899 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003900 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04003901 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003902
3903 def test_unicode_charset_name(self):
3904 charset = Charset('us-ascii')
3905 self.assertEqual(str(charset), 'us-ascii')
3906 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
3907
3908
Ezio Melottib3aedd42010-11-20 19:04:17 +00003909
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003910# Test multilingual MIME headers.
3911class TestHeader(TestEmailBase):
3912 def test_simple(self):
3913 eq = self.ndiffAssertEqual
3914 h = Header('Hello World!')
3915 eq(h.encode(), 'Hello World!')
3916 h.append(' Goodbye World!')
3917 eq(h.encode(), 'Hello World! Goodbye World!')
3918
3919 def test_simple_surprise(self):
3920 eq = self.ndiffAssertEqual
3921 h = Header('Hello World!')
3922 eq(h.encode(), 'Hello World!')
3923 h.append('Goodbye World!')
3924 eq(h.encode(), 'Hello World! Goodbye World!')
3925
3926 def test_header_needs_no_decoding(self):
3927 h = 'no decoding needed'
3928 self.assertEqual(decode_header(h), [(h, None)])
3929
3930 def test_long(self):
3931 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
3932 maxlinelen=76)
3933 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003934 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003935
3936 def test_multilingual(self):
3937 eq = self.ndiffAssertEqual
3938 g = Charset("iso-8859-1")
3939 cz = Charset("iso-8859-2")
3940 utf8 = Charset("utf-8")
3941 g_head = (b'Die Mieter treten hier ein werden mit einem '
3942 b'Foerderband komfortabel den Korridor entlang, '
3943 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
3944 b'gegen die rotierenden Klingen bef\xf6rdert. ')
3945 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
3946 b'd\xf9vtipu.. ')
3947 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
3948 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
3949 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
3950 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
3951 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
3952 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
3953 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
3954 '\u3044\u307e\u3059\u3002')
3955 h = Header(g_head, g)
3956 h.append(cz_head, cz)
3957 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00003958 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003959 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00003960=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
3961 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
3962 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
3963 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003964 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
3965 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
3966 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
3967 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00003968 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
3969 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
3970 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
3971 decoded = decode_header(enc)
3972 eq(len(decoded), 3)
3973 eq(decoded[0], (g_head, 'iso-8859-1'))
3974 eq(decoded[1], (cz_head, 'iso-8859-2'))
3975 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003976 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00003977 eq(ustr,
3978 (b'Die Mieter treten hier ein werden mit einem Foerderband '
3979 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
3980 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
3981 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
3982 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
3983 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
3984 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
3985 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
3986 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
3987 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
3988 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
3989 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
3990 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
3991 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
3992 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
3993 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
3994 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003995 # Test make_header()
3996 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00003997 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003998
3999 def test_empty_header_encode(self):
4000 h = Header()
4001 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004002
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004003 def test_header_ctor_default_args(self):
4004 eq = self.ndiffAssertEqual
4005 h = Header()
4006 eq(h, '')
4007 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004008 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004009
4010 def test_explicit_maxlinelen(self):
4011 eq = self.ndiffAssertEqual
4012 hstr = ('A very long line that must get split to something other '
4013 'than at the 76th character boundary to test the non-default '
4014 'behavior')
4015 h = Header(hstr)
4016 eq(h.encode(), '''\
4017A very long line that must get split to something other than at the 76th
4018 character boundary to test the non-default behavior''')
4019 eq(str(h), hstr)
4020 h = Header(hstr, header_name='Subject')
4021 eq(h.encode(), '''\
4022A very long line that must get split to something other than at the
4023 76th character boundary to test the non-default behavior''')
4024 eq(str(h), hstr)
4025 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4026 eq(h.encode(), hstr)
4027 eq(str(h), hstr)
4028
Guido van Rossum9604e662007-08-30 03:46:43 +00004029 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004030 eq = self.ndiffAssertEqual
4031 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004032 x = 'xxxx ' * 20
4033 h.append(x)
4034 s = h.encode()
4035 eq(s, """\
4036=?iso-8859-1?q?xxx?=
4037 =?iso-8859-1?q?x_?=
4038 =?iso-8859-1?q?xx?=
4039 =?iso-8859-1?q?xx?=
4040 =?iso-8859-1?q?_x?=
4041 =?iso-8859-1?q?xx?=
4042 =?iso-8859-1?q?x_?=
4043 =?iso-8859-1?q?xx?=
4044 =?iso-8859-1?q?xx?=
4045 =?iso-8859-1?q?_x?=
4046 =?iso-8859-1?q?xx?=
4047 =?iso-8859-1?q?x_?=
4048 =?iso-8859-1?q?xx?=
4049 =?iso-8859-1?q?xx?=
4050 =?iso-8859-1?q?_x?=
4051 =?iso-8859-1?q?xx?=
4052 =?iso-8859-1?q?x_?=
4053 =?iso-8859-1?q?xx?=
4054 =?iso-8859-1?q?xx?=
4055 =?iso-8859-1?q?_x?=
4056 =?iso-8859-1?q?xx?=
4057 =?iso-8859-1?q?x_?=
4058 =?iso-8859-1?q?xx?=
4059 =?iso-8859-1?q?xx?=
4060 =?iso-8859-1?q?_x?=
4061 =?iso-8859-1?q?xx?=
4062 =?iso-8859-1?q?x_?=
4063 =?iso-8859-1?q?xx?=
4064 =?iso-8859-1?q?xx?=
4065 =?iso-8859-1?q?_x?=
4066 =?iso-8859-1?q?xx?=
4067 =?iso-8859-1?q?x_?=
4068 =?iso-8859-1?q?xx?=
4069 =?iso-8859-1?q?xx?=
4070 =?iso-8859-1?q?_x?=
4071 =?iso-8859-1?q?xx?=
4072 =?iso-8859-1?q?x_?=
4073 =?iso-8859-1?q?xx?=
4074 =?iso-8859-1?q?xx?=
4075 =?iso-8859-1?q?_x?=
4076 =?iso-8859-1?q?xx?=
4077 =?iso-8859-1?q?x_?=
4078 =?iso-8859-1?q?xx?=
4079 =?iso-8859-1?q?xx?=
4080 =?iso-8859-1?q?_x?=
4081 =?iso-8859-1?q?xx?=
4082 =?iso-8859-1?q?x_?=
4083 =?iso-8859-1?q?xx?=
4084 =?iso-8859-1?q?xx?=
4085 =?iso-8859-1?q?_?=""")
4086 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004087 h = Header(charset='iso-8859-1', maxlinelen=40)
4088 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004089 s = h.encode()
4090 eq(s, """\
4091=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4092 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4093 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4094 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4095 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4096 eq(x, str(make_header(decode_header(s))))
4097
4098 def test_base64_splittable(self):
4099 eq = self.ndiffAssertEqual
4100 h = Header(charset='koi8-r', maxlinelen=20)
4101 x = 'xxxx ' * 20
4102 h.append(x)
4103 s = h.encode()
4104 eq(s, """\
4105=?koi8-r?b?eHh4?=
4106 =?koi8-r?b?eCB4?=
4107 =?koi8-r?b?eHh4?=
4108 =?koi8-r?b?IHh4?=
4109 =?koi8-r?b?eHgg?=
4110 =?koi8-r?b?eHh4?=
4111 =?koi8-r?b?eCB4?=
4112 =?koi8-r?b?eHh4?=
4113 =?koi8-r?b?IHh4?=
4114 =?koi8-r?b?eHgg?=
4115 =?koi8-r?b?eHh4?=
4116 =?koi8-r?b?eCB4?=
4117 =?koi8-r?b?eHh4?=
4118 =?koi8-r?b?IHh4?=
4119 =?koi8-r?b?eHgg?=
4120 =?koi8-r?b?eHh4?=
4121 =?koi8-r?b?eCB4?=
4122 =?koi8-r?b?eHh4?=
4123 =?koi8-r?b?IHh4?=
4124 =?koi8-r?b?eHgg?=
4125 =?koi8-r?b?eHh4?=
4126 =?koi8-r?b?eCB4?=
4127 =?koi8-r?b?eHh4?=
4128 =?koi8-r?b?IHh4?=
4129 =?koi8-r?b?eHgg?=
4130 =?koi8-r?b?eHh4?=
4131 =?koi8-r?b?eCB4?=
4132 =?koi8-r?b?eHh4?=
4133 =?koi8-r?b?IHh4?=
4134 =?koi8-r?b?eHgg?=
4135 =?koi8-r?b?eHh4?=
4136 =?koi8-r?b?eCB4?=
4137 =?koi8-r?b?eHh4?=
4138 =?koi8-r?b?IA==?=""")
4139 eq(x, str(make_header(decode_header(s))))
4140 h = Header(charset='koi8-r', maxlinelen=40)
4141 h.append(x)
4142 s = h.encode()
4143 eq(s, """\
4144=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4145 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4146 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4147 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4148 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4149 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4150 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004151
4152 def test_us_ascii_header(self):
4153 eq = self.assertEqual
4154 s = 'hello'
4155 x = decode_header(s)
4156 eq(x, [('hello', None)])
4157 h = make_header(x)
4158 eq(s, h.encode())
4159
4160 def test_string_charset(self):
4161 eq = self.assertEqual
4162 h = Header()
4163 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004164 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004165
4166## def test_unicode_error(self):
4167## raises = self.assertRaises
4168## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4169## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4170## h = Header()
4171## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4172## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4173## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4174
4175 def test_utf8_shortest(self):
4176 eq = self.assertEqual
4177 h = Header('p\xf6stal', 'utf-8')
4178 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4179 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4180 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4181
4182 def test_bad_8bit_header(self):
4183 raises = self.assertRaises
4184 eq = self.assertEqual
4185 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4186 raises(UnicodeError, Header, x)
4187 h = Header()
4188 raises(UnicodeError, h.append, x)
4189 e = x.decode('utf-8', 'replace')
4190 eq(str(Header(x, errors='replace')), e)
4191 h.append(x, errors='replace')
4192 eq(str(h), e)
4193
R David Murray041015c2011-03-25 15:10:55 -04004194 def test_escaped_8bit_header(self):
4195 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004196 e = x.decode('ascii', 'surrogateescape')
4197 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004198 self.assertEqual(str(h),
4199 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4200 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4201
R David Murraye5e366c2011-06-18 12:57:28 -04004202 def test_header_handles_binary_unknown8bit(self):
4203 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4204 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4205 self.assertEqual(str(h),
4206 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4207 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4208
4209 def test_make_header_handles_binary_unknown8bit(self):
4210 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4211 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4212 h2 = email.header.make_header(email.header.decode_header(h))
4213 self.assertEqual(str(h2),
4214 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4215 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4216
R David Murray041015c2011-03-25 15:10:55 -04004217 def test_modify_returned_list_does_not_change_header(self):
4218 h = Header('test')
4219 chunks = email.header.decode_header(h)
4220 chunks.append(('ascii', 'test2'))
4221 self.assertEqual(str(h), 'test')
4222
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004223 def test_encoded_adjacent_nonencoded(self):
4224 eq = self.assertEqual
4225 h = Header()
4226 h.append('hello', 'iso-8859-1')
4227 h.append('world')
4228 s = h.encode()
4229 eq(s, '=?iso-8859-1?q?hello?= world')
4230 h = make_header(decode_header(s))
4231 eq(h.encode(), s)
4232
4233 def test_whitespace_eater(self):
4234 eq = self.assertEqual
4235 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4236 parts = decode_header(s)
4237 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
4238 hdr = make_header(parts)
4239 eq(hdr.encode(),
4240 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4241
4242 def test_broken_base64_header(self):
4243 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004244 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004245 raises(errors.HeaderParseError, decode_header, s)
4246
R. David Murray477efb32011-01-05 01:39:32 +00004247 def test_shift_jis_charset(self):
4248 h = Header('文', charset='shift_jis')
4249 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4250
R David Murrayde912762011-03-16 18:26:23 -04004251 def test_flatten_header_with_no_value(self):
4252 # Issue 11401 (regression from email 4.x) Note that the space after
4253 # the header doesn't reflect the input, but this is also the way
4254 # email 4.x behaved. At some point it would be nice to fix that.
4255 msg = email.message_from_string("EmptyHeader:")
4256 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4257
R David Murray01581ee2011-04-18 10:04:34 -04004258 def test_encode_preserves_leading_ws_on_value(self):
4259 msg = Message()
4260 msg['SomeHeader'] = ' value with leading ws'
4261 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4262
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004263
Ezio Melottib3aedd42010-11-20 19:04:17 +00004264
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004265# Test RFC 2231 header parameters (en/de)coding
4266class TestRFC2231(TestEmailBase):
4267 def test_get_param(self):
4268 eq = self.assertEqual
4269 msg = self._msgobj('msg_29.txt')
4270 eq(msg.get_param('title'),
4271 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4272 eq(msg.get_param('title', unquote=False),
4273 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4274
4275 def test_set_param(self):
4276 eq = self.ndiffAssertEqual
4277 msg = Message()
4278 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4279 charset='us-ascii')
4280 eq(msg.get_param('title'),
4281 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4282 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4283 charset='us-ascii', language='en')
4284 eq(msg.get_param('title'),
4285 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4286 msg = self._msgobj('msg_01.txt')
4287 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4288 charset='us-ascii', language='en')
4289 eq(msg.as_string(maxheaderlen=78), """\
4290Return-Path: <bbb@zzz.org>
4291Delivered-To: bbb@zzz.org
4292Received: by mail.zzz.org (Postfix, from userid 889)
4293\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4294MIME-Version: 1.0
4295Content-Transfer-Encoding: 7bit
4296Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4297From: bbb@ddd.com (John X. Doe)
4298To: bbb@zzz.org
4299Subject: This is a test message
4300Date: Fri, 4 May 2001 14:05:44 -0400
4301Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004302 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004303
4304
4305Hi,
4306
4307Do you like this message?
4308
4309-Me
4310""")
4311
R David Murraya2860e82011-04-16 09:20:30 -04004312 def test_set_param_requote(self):
4313 msg = Message()
4314 msg.set_param('title', 'foo')
4315 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4316 msg.set_param('title', 'bar', requote=False)
4317 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4318 # tspecial is still quoted.
4319 msg.set_param('title', "(bar)bell", requote=False)
4320 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4321
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004322 def test_del_param(self):
4323 eq = self.ndiffAssertEqual
4324 msg = self._msgobj('msg_01.txt')
4325 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4326 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4327 charset='us-ascii', language='en')
4328 msg.del_param('foo', header='Content-Type')
4329 eq(msg.as_string(maxheaderlen=78), """\
4330Return-Path: <bbb@zzz.org>
4331Delivered-To: bbb@zzz.org
4332Received: by mail.zzz.org (Postfix, from userid 889)
4333\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4334MIME-Version: 1.0
4335Content-Transfer-Encoding: 7bit
4336Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4337From: bbb@ddd.com (John X. Doe)
4338To: bbb@zzz.org
4339Subject: This is a test message
4340Date: Fri, 4 May 2001 14:05:44 -0400
4341Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004342 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004343
4344
4345Hi,
4346
4347Do you like this message?
4348
4349-Me
4350""")
4351
4352 def test_rfc2231_get_content_charset(self):
4353 eq = self.assertEqual
4354 msg = self._msgobj('msg_32.txt')
4355 eq(msg.get_content_charset(), 'us-ascii')
4356
R. David Murraydfd7eb02010-12-24 22:36:49 +00004357 def test_rfc2231_parse_rfc_quoting(self):
4358 m = textwrap.dedent('''\
4359 Content-Disposition: inline;
4360 \tfilename*0*=''This%20is%20even%20more%20;
4361 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4362 \tfilename*2="is it not.pdf"
4363
4364 ''')
4365 msg = email.message_from_string(m)
4366 self.assertEqual(msg.get_filename(),
4367 'This is even more ***fun*** is it not.pdf')
4368 self.assertEqual(m, msg.as_string())
4369
4370 def test_rfc2231_parse_extra_quoting(self):
4371 m = textwrap.dedent('''\
4372 Content-Disposition: inline;
4373 \tfilename*0*="''This%20is%20even%20more%20";
4374 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4375 \tfilename*2="is it not.pdf"
4376
4377 ''')
4378 msg = email.message_from_string(m)
4379 self.assertEqual(msg.get_filename(),
4380 'This is even more ***fun*** is it not.pdf')
4381 self.assertEqual(m, msg.as_string())
4382
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004383 def test_rfc2231_no_language_or_charset(self):
4384 m = '''\
4385Content-Transfer-Encoding: 8bit
4386Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4387Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4388
4389'''
4390 msg = email.message_from_string(m)
4391 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004392 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004393 self.assertEqual(
4394 param,
4395 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4396
4397 def test_rfc2231_no_language_or_charset_in_filename(self):
4398 m = '''\
4399Content-Disposition: inline;
4400\tfilename*0*="''This%20is%20even%20more%20";
4401\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4402\tfilename*2="is it not.pdf"
4403
4404'''
4405 msg = email.message_from_string(m)
4406 self.assertEqual(msg.get_filename(),
4407 'This is even more ***fun*** is it not.pdf')
4408
4409 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4410 m = '''\
4411Content-Disposition: inline;
4412\tfilename*0*="''This%20is%20even%20more%20";
4413\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4414\tfilename*2="is it not.pdf"
4415
4416'''
4417 msg = email.message_from_string(m)
4418 self.assertEqual(msg.get_filename(),
4419 'This is even more ***fun*** is it not.pdf')
4420
4421 def test_rfc2231_partly_encoded(self):
4422 m = '''\
4423Content-Disposition: inline;
4424\tfilename*0="''This%20is%20even%20more%20";
4425\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4426\tfilename*2="is it not.pdf"
4427
4428'''
4429 msg = email.message_from_string(m)
4430 self.assertEqual(
4431 msg.get_filename(),
4432 'This%20is%20even%20more%20***fun*** is it not.pdf')
4433
4434 def test_rfc2231_partly_nonencoded(self):
4435 m = '''\
4436Content-Disposition: inline;
4437\tfilename*0="This%20is%20even%20more%20";
4438\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4439\tfilename*2="is it not.pdf"
4440
4441'''
4442 msg = email.message_from_string(m)
4443 self.assertEqual(
4444 msg.get_filename(),
4445 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4446
4447 def test_rfc2231_no_language_or_charset_in_boundary(self):
4448 m = '''\
4449Content-Type: multipart/alternative;
4450\tboundary*0*="''This%20is%20even%20more%20";
4451\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4452\tboundary*2="is it not.pdf"
4453
4454'''
4455 msg = email.message_from_string(m)
4456 self.assertEqual(msg.get_boundary(),
4457 'This is even more ***fun*** is it not.pdf')
4458
4459 def test_rfc2231_no_language_or_charset_in_charset(self):
4460 # This is a nonsensical charset value, but tests the code anyway
4461 m = '''\
4462Content-Type: text/plain;
4463\tcharset*0*="This%20is%20even%20more%20";
4464\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4465\tcharset*2="is it not.pdf"
4466
4467'''
4468 msg = email.message_from_string(m)
4469 self.assertEqual(msg.get_content_charset(),
4470 'this is even more ***fun*** is it not.pdf')
4471
4472 def test_rfc2231_bad_encoding_in_filename(self):
4473 m = '''\
4474Content-Disposition: inline;
4475\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4476\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4477\tfilename*2="is it not.pdf"
4478
4479'''
4480 msg = email.message_from_string(m)
4481 self.assertEqual(msg.get_filename(),
4482 'This is even more ***fun*** is it not.pdf')
4483
4484 def test_rfc2231_bad_encoding_in_charset(self):
4485 m = """\
4486Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4487
4488"""
4489 msg = email.message_from_string(m)
4490 # This should return None because non-ascii characters in the charset
4491 # are not allowed.
4492 self.assertEqual(msg.get_content_charset(), None)
4493
4494 def test_rfc2231_bad_character_in_charset(self):
4495 m = """\
4496Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4497
4498"""
4499 msg = email.message_from_string(m)
4500 # This should return None because non-ascii characters in the charset
4501 # are not allowed.
4502 self.assertEqual(msg.get_content_charset(), None)
4503
4504 def test_rfc2231_bad_character_in_filename(self):
4505 m = '''\
4506Content-Disposition: inline;
4507\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4508\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4509\tfilename*2*="is it not.pdf%E2"
4510
4511'''
4512 msg = email.message_from_string(m)
4513 self.assertEqual(msg.get_filename(),
4514 'This is even more ***fun*** is it not.pdf\ufffd')
4515
4516 def test_rfc2231_unknown_encoding(self):
4517 m = """\
4518Content-Transfer-Encoding: 8bit
4519Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4520
4521"""
4522 msg = email.message_from_string(m)
4523 self.assertEqual(msg.get_filename(), 'myfile.txt')
4524
4525 def test_rfc2231_single_tick_in_filename_extended(self):
4526 eq = self.assertEqual
4527 m = """\
4528Content-Type: application/x-foo;
4529\tname*0*=\"Frank's\"; name*1*=\" Document\"
4530
4531"""
4532 msg = email.message_from_string(m)
4533 charset, language, s = msg.get_param('name')
4534 eq(charset, None)
4535 eq(language, None)
4536 eq(s, "Frank's Document")
4537
4538 def test_rfc2231_single_tick_in_filename(self):
4539 m = """\
4540Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4541
4542"""
4543 msg = email.message_from_string(m)
4544 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004545 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004546 self.assertEqual(param, "Frank's Document")
4547
4548 def test_rfc2231_tick_attack_extended(self):
4549 eq = self.assertEqual
4550 m = """\
4551Content-Type: application/x-foo;
4552\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4553
4554"""
4555 msg = email.message_from_string(m)
4556 charset, language, s = msg.get_param('name')
4557 eq(charset, 'us-ascii')
4558 eq(language, 'en-us')
4559 eq(s, "Frank's Document")
4560
4561 def test_rfc2231_tick_attack(self):
4562 m = """\
4563Content-Type: application/x-foo;
4564\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4565
4566"""
4567 msg = email.message_from_string(m)
4568 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004569 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004570 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4571
4572 def test_rfc2231_no_extended_values(self):
4573 eq = self.assertEqual
4574 m = """\
4575Content-Type: application/x-foo; name=\"Frank's Document\"
4576
4577"""
4578 msg = email.message_from_string(m)
4579 eq(msg.get_param('name'), "Frank's Document")
4580
4581 def test_rfc2231_encoded_then_unencoded_segments(self):
4582 eq = self.assertEqual
4583 m = """\
4584Content-Type: application/x-foo;
4585\tname*0*=\"us-ascii'en-us'My\";
4586\tname*1=\" Document\";
4587\tname*2*=\" For You\"
4588
4589"""
4590 msg = email.message_from_string(m)
4591 charset, language, s = msg.get_param('name')
4592 eq(charset, 'us-ascii')
4593 eq(language, 'en-us')
4594 eq(s, 'My Document For You')
4595
4596 def test_rfc2231_unencoded_then_encoded_segments(self):
4597 eq = self.assertEqual
4598 m = """\
4599Content-Type: application/x-foo;
4600\tname*0=\"us-ascii'en-us'My\";
4601\tname*1*=\" Document\";
4602\tname*2*=\" For You\"
4603
4604"""
4605 msg = email.message_from_string(m)
4606 charset, language, s = msg.get_param('name')
4607 eq(charset, 'us-ascii')
4608 eq(language, 'en-us')
4609 eq(s, 'My Document For You')
4610
4611
Ezio Melottib3aedd42010-11-20 19:04:17 +00004612
R. David Murraya8f480f2010-01-16 18:30:03 +00004613# Tests to ensure that signed parts of an email are completely preserved, as
4614# required by RFC1847 section 2.1. Note that these are incomplete, because the
4615# email package does not currently always preserve the body. See issue 1670765.
4616class TestSigned(TestEmailBase):
4617
4618 def _msg_and_obj(self, filename):
4619 with openfile(findfile(filename)) as fp:
4620 original = fp.read()
4621 msg = email.message_from_string(original)
4622 return original, msg
4623
4624 def _signed_parts_eq(self, original, result):
4625 # Extract the first mime part of each message
4626 import re
4627 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4628 inpart = repart.search(original).group(2)
4629 outpart = repart.search(result).group(2)
4630 self.assertEqual(outpart, inpart)
4631
4632 def test_long_headers_as_string(self):
4633 original, msg = self._msg_and_obj('msg_45.txt')
4634 result = msg.as_string()
4635 self._signed_parts_eq(original, result)
4636
4637 def test_long_headers_as_string_maxheaderlen(self):
4638 original, msg = self._msg_and_obj('msg_45.txt')
4639 result = msg.as_string(maxheaderlen=60)
4640 self._signed_parts_eq(original, result)
4641
4642 def test_long_headers_flatten(self):
4643 original, msg = self._msg_and_obj('msg_45.txt')
4644 fp = StringIO()
4645 Generator(fp).flatten(msg)
4646 result = fp.getvalue()
4647 self._signed_parts_eq(original, result)
4648
4649
Ezio Melottib3aedd42010-11-20 19:04:17 +00004650
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004651def _testclasses():
4652 mod = sys.modules[__name__]
4653 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
4654
4655
4656def suite():
4657 suite = unittest.TestSuite()
4658 for testclass in _testclasses():
4659 suite.addTest(unittest.makeSuite(testclass))
4660 return suite
4661
4662
4663def test_main():
4664 for testclass in _testclasses():
4665 run_unittest(testclass)
4666
4667
Ezio Melottib3aedd42010-11-20 19:04:17 +00004668
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004669if __name__ == '__main__':
4670 unittest.main(defaultTest='suite')