blob: 281a65a6da4df693b5617fe2b04024c335912d7d [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R. David Murray96fd54e2010-10-08 15:55:28 +000039from test.support import findfile, run_unittest, unlink
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040from email.test import __file__ as landmark
41
42
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Ezio Melottib3aedd42010-11-20 19:04:17 +000048
Guido van Rossum8b3febe2007-08-30 01:15:14 +000049def openfile(filename, *args, **kws):
50 path = os.path.join(os.path.dirname(landmark), 'data', filename)
51 return open(path, *args, **kws)
52
53
Ezio Melottib3aedd42010-11-20 19:04:17 +000054
Guido van Rossum8b3febe2007-08-30 01:15:14 +000055# Base test class
56class TestEmailBase(unittest.TestCase):
57 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000058 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000059 if first != second:
60 sfirst = str(first)
61 ssecond = str(second)
62 rfirst = [repr(line) for line in sfirst.splitlines()]
63 rsecond = [repr(line) for line in ssecond.splitlines()]
64 diff = difflib.ndiff(rfirst, rsecond)
65 raise self.failureException(NL + NL.join(diff))
66
67 def _msgobj(self, filename):
68 with openfile(findfile(filename)) as fp:
69 return email.message_from_file(fp)
70
71
Ezio Melottib3aedd42010-11-20 19:04:17 +000072
Guido van Rossum8b3febe2007-08-30 01:15:14 +000073# Test various aspects of the Message class's API
74class TestMessageAPI(TestEmailBase):
75 def test_get_all(self):
76 eq = self.assertEqual
77 msg = self._msgobj('msg_20.txt')
78 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
79 eq(msg.get_all('xx', 'n/a'), 'n/a')
80
R. David Murraye5db2632010-11-20 15:10:13 +000081 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 eq = self.assertEqual
83 msg = Message()
84 eq(msg.get_charset(), None)
85 charset = Charset('iso-8859-1')
86 msg.set_charset(charset)
87 eq(msg['mime-version'], '1.0')
88 eq(msg.get_content_type(), 'text/plain')
89 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
90 eq(msg.get_param('charset'), 'iso-8859-1')
91 eq(msg['content-transfer-encoding'], 'quoted-printable')
92 eq(msg.get_charset().input_charset, 'iso-8859-1')
93 # Remove the charset
94 msg.set_charset(None)
95 eq(msg.get_charset(), None)
96 eq(msg['content-type'], 'text/plain')
97 # Try adding a charset when there's already MIME headers present
98 msg = Message()
99 msg['MIME-Version'] = '2.0'
100 msg['Content-Type'] = 'text/x-weird'
101 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
102 msg.set_charset(charset)
103 eq(msg['mime-version'], '2.0')
104 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
105 eq(msg['content-transfer-encoding'], 'quinted-puntable')
106
107 def test_set_charset_from_string(self):
108 eq = self.assertEqual
109 msg = Message()
110 msg.set_charset('us-ascii')
111 eq(msg.get_charset().input_charset, 'us-ascii')
112 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
113
114 def test_set_payload_with_charset(self):
115 msg = Message()
116 charset = Charset('iso-8859-1')
117 msg.set_payload('This is a string payload', charset)
118 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
119
120 def test_get_charsets(self):
121 eq = self.assertEqual
122
123 msg = self._msgobj('msg_08.txt')
124 charsets = msg.get_charsets()
125 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
126
127 msg = self._msgobj('msg_09.txt')
128 charsets = msg.get_charsets('dingbat')
129 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
130 'koi8-r'])
131
132 msg = self._msgobj('msg_12.txt')
133 charsets = msg.get_charsets()
134 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
135 'iso-8859-3', 'us-ascii', 'koi8-r'])
136
137 def test_get_filename(self):
138 eq = self.assertEqual
139
140 msg = self._msgobj('msg_04.txt')
141 filenames = [p.get_filename() for p in msg.get_payload()]
142 eq(filenames, ['msg.txt', 'msg.txt'])
143
144 msg = self._msgobj('msg_07.txt')
145 subpart = msg.get_payload(1)
146 eq(subpart.get_filename(), 'dingusfish.gif')
147
148 def test_get_filename_with_name_parameter(self):
149 eq = self.assertEqual
150
151 msg = self._msgobj('msg_44.txt')
152 filenames = [p.get_filename() for p in msg.get_payload()]
153 eq(filenames, ['msg.txt', 'msg.txt'])
154
155 def test_get_boundary(self):
156 eq = self.assertEqual
157 msg = self._msgobj('msg_07.txt')
158 # No quotes!
159 eq(msg.get_boundary(), 'BOUNDARY')
160
161 def test_set_boundary(self):
162 eq = self.assertEqual
163 # This one has no existing boundary parameter, but the Content-Type:
164 # header appears fifth.
165 msg = self._msgobj('msg_01.txt')
166 msg.set_boundary('BOUNDARY')
167 header, value = msg.items()[4]
168 eq(header.lower(), 'content-type')
169 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
170 # This one has a Content-Type: header, with a boundary, stuck in the
171 # middle of its headers. Make sure the order is preserved; it should
172 # be fifth.
173 msg = self._msgobj('msg_04.txt')
174 msg.set_boundary('BOUNDARY')
175 header, value = msg.items()[4]
176 eq(header.lower(), 'content-type')
177 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
178 # And this one has no Content-Type: header at all.
179 msg = self._msgobj('msg_03.txt')
180 self.assertRaises(errors.HeaderParseError,
181 msg.set_boundary, 'BOUNDARY')
182
R. David Murray73a559d2010-12-21 18:07:59 +0000183 def test_make_boundary(self):
184 msg = MIMEMultipart('form-data')
185 # Note that when the boundary gets created is an implementation
186 # detail and might change.
187 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
188 # Trigger creation of boundary
189 msg.as_string()
190 self.assertEqual(msg.items()[0][1][:33],
191 'multipart/form-data; boundary="==')
192 # XXX: there ought to be tests of the uniqueness of the boundary, too.
193
R. David Murray57c45ac2010-02-21 04:39:40 +0000194 def test_message_rfc822_only(self):
195 # Issue 7970: message/rfc822 not in multipart parsed by
196 # HeaderParser caused an exception when flattened.
Brett Cannon384917a2010-10-29 23:08:36 +0000197 with openfile(findfile('msg_46.txt')) as fp:
198 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000199 parser = HeaderParser()
200 msg = parser.parsestr(msgdata)
201 out = StringIO()
202 gen = Generator(out, True, 0)
203 gen.flatten(msg, False)
204 self.assertEqual(out.getvalue(), msgdata)
205
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000206 def test_get_decoded_payload(self):
207 eq = self.assertEqual
208 msg = self._msgobj('msg_10.txt')
209 # The outer message is a multipart
210 eq(msg.get_payload(decode=True), None)
211 # Subpart 1 is 7bit encoded
212 eq(msg.get_payload(0).get_payload(decode=True),
213 b'This is a 7bit encoded message.\n')
214 # Subpart 2 is quopri
215 eq(msg.get_payload(1).get_payload(decode=True),
216 b'\xa1This is a Quoted Printable encoded message!\n')
217 # Subpart 3 is base64
218 eq(msg.get_payload(2).get_payload(decode=True),
219 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000220 # Subpart 4 is base64 with a trailing newline, which
221 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000222 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000223 b'This is a Base64 encoded message.\n')
224 # Subpart 5 has no Content-Transfer-Encoding: header.
225 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000226 b'This has no Content-Transfer-Encoding: header.\n')
227
228 def test_get_decoded_uu_payload(self):
229 eq = self.assertEqual
230 msg = Message()
231 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
232 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
233 msg['content-transfer-encoding'] = cte
234 eq(msg.get_payload(decode=True), b'hello world')
235 # Now try some bogus data
236 msg.set_payload('foo')
237 eq(msg.get_payload(decode=True), b'foo')
238
R David Murraya2860e82011-04-16 09:20:30 -0400239 def test_get_payload_n_raises_on_non_multipart(self):
240 msg = Message()
241 self.assertRaises(TypeError, msg.get_payload, 1)
242
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000243 def test_decoded_generator(self):
244 eq = self.assertEqual
245 msg = self._msgobj('msg_07.txt')
246 with openfile('msg_17.txt') as fp:
247 text = fp.read()
248 s = StringIO()
249 g = DecodedGenerator(s)
250 g.flatten(msg)
251 eq(s.getvalue(), text)
252
253 def test__contains__(self):
254 msg = Message()
255 msg['From'] = 'Me'
256 msg['to'] = 'You'
257 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000258 self.assertTrue('from' in msg)
259 self.assertTrue('From' in msg)
260 self.assertTrue('FROM' in msg)
261 self.assertTrue('to' in msg)
262 self.assertTrue('To' in msg)
263 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000264
265 def test_as_string(self):
266 eq = self.ndiffAssertEqual
267 msg = self._msgobj('msg_01.txt')
268 with openfile('msg_01.txt') as fp:
269 text = fp.read()
270 eq(text, str(msg))
271 fullrepr = msg.as_string(unixfrom=True)
272 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000273 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000274 eq(text, NL.join(lines[1:]))
275
276 def test_bad_param(self):
277 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
278 self.assertEqual(msg.get_param('baz'), '')
279
280 def test_missing_filename(self):
281 msg = email.message_from_string("From: foo\n")
282 self.assertEqual(msg.get_filename(), None)
283
284 def test_bogus_filename(self):
285 msg = email.message_from_string(
286 "Content-Disposition: blarg; filename\n")
287 self.assertEqual(msg.get_filename(), '')
288
289 def test_missing_boundary(self):
290 msg = email.message_from_string("From: foo\n")
291 self.assertEqual(msg.get_boundary(), None)
292
293 def test_get_params(self):
294 eq = self.assertEqual
295 msg = email.message_from_string(
296 'X-Header: foo=one; bar=two; baz=three\n')
297 eq(msg.get_params(header='x-header'),
298 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
299 msg = email.message_from_string(
300 'X-Header: foo; bar=one; baz=two\n')
301 eq(msg.get_params(header='x-header'),
302 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
303 eq(msg.get_params(), None)
304 msg = email.message_from_string(
305 'X-Header: foo; bar="one"; baz=two\n')
306 eq(msg.get_params(header='x-header'),
307 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
308
309 def test_get_param_liberal(self):
310 msg = Message()
311 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
312 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
313
314 def test_get_param(self):
315 eq = self.assertEqual
316 msg = email.message_from_string(
317 "X-Header: foo=one; bar=two; baz=three\n")
318 eq(msg.get_param('bar', header='x-header'), 'two')
319 eq(msg.get_param('quuz', header='x-header'), None)
320 eq(msg.get_param('quuz'), None)
321 msg = email.message_from_string(
322 'X-Header: foo; bar="one"; baz=two\n')
323 eq(msg.get_param('foo', header='x-header'), '')
324 eq(msg.get_param('bar', header='x-header'), 'one')
325 eq(msg.get_param('baz', header='x-header'), 'two')
326 # XXX: We are not RFC-2045 compliant! We cannot parse:
327 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
328 # msg.get_param("weird")
329 # yet.
330
331 def test_get_param_funky_continuation_lines(self):
332 msg = self._msgobj('msg_22.txt')
333 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
334
335 def test_get_param_with_semis_in_quotes(self):
336 msg = email.message_from_string(
337 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
338 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
339 self.assertEqual(msg.get_param('name', unquote=False),
340 '"Jim&amp;&amp;Jill"')
341
R. David Murrayd48739f2010-04-14 18:59:18 +0000342 def test_get_param_with_quotes(self):
343 msg = email.message_from_string(
344 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
345 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
346 msg = email.message_from_string(
347 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
348 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
349
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000350 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000351 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000352 msg = email.message_from_string('Header: exists')
353 unless('header' in msg)
354 unless('Header' in msg)
355 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000356 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000357
358 def test_set_param(self):
359 eq = self.assertEqual
360 msg = Message()
361 msg.set_param('charset', 'iso-2022-jp')
362 eq(msg.get_param('charset'), 'iso-2022-jp')
363 msg.set_param('importance', 'high value')
364 eq(msg.get_param('importance'), 'high value')
365 eq(msg.get_param('importance', unquote=False), '"high value"')
366 eq(msg.get_params(), [('text/plain', ''),
367 ('charset', 'iso-2022-jp'),
368 ('importance', 'high value')])
369 eq(msg.get_params(unquote=False), [('text/plain', ''),
370 ('charset', '"iso-2022-jp"'),
371 ('importance', '"high value"')])
372 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
373 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
374
375 def test_del_param(self):
376 eq = self.assertEqual
377 msg = self._msgobj('msg_05.txt')
378 eq(msg.get_params(),
379 [('multipart/report', ''), ('report-type', 'delivery-status'),
380 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
381 old_val = msg.get_param("report-type")
382 msg.del_param("report-type")
383 eq(msg.get_params(),
384 [('multipart/report', ''),
385 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
386 msg.set_param("report-type", old_val)
387 eq(msg.get_params(),
388 [('multipart/report', ''),
389 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
390 ('report-type', old_val)])
391
392 def test_del_param_on_other_header(self):
393 msg = Message()
394 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
395 msg.del_param('filename', 'content-disposition')
396 self.assertEqual(msg['content-disposition'], 'attachment')
397
R David Murraya2860e82011-04-16 09:20:30 -0400398 def test_del_param_on_nonexistent_header(self):
399 msg = Message()
400 msg.del_param('filename', 'content-disposition')
401
402 def test_del_nonexistent_param(self):
403 msg = Message()
404 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
405 existing_header = msg['Content-Type']
406 msg.del_param('foobar', header='Content-Type')
407 self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"')
408
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000409 def test_set_type(self):
410 eq = self.assertEqual
411 msg = Message()
412 self.assertRaises(ValueError, msg.set_type, 'text')
413 msg.set_type('text/plain')
414 eq(msg['content-type'], 'text/plain')
415 msg.set_param('charset', 'us-ascii')
416 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
417 msg.set_type('text/html')
418 eq(msg['content-type'], 'text/html; charset="us-ascii"')
419
420 def test_set_type_on_other_header(self):
421 msg = Message()
422 msg['X-Content-Type'] = 'text/plain'
423 msg.set_type('application/octet-stream', 'X-Content-Type')
424 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
425
426 def test_get_content_type_missing(self):
427 msg = Message()
428 self.assertEqual(msg.get_content_type(), 'text/plain')
429
430 def test_get_content_type_missing_with_default_type(self):
431 msg = Message()
432 msg.set_default_type('message/rfc822')
433 self.assertEqual(msg.get_content_type(), 'message/rfc822')
434
435 def test_get_content_type_from_message_implicit(self):
436 msg = self._msgobj('msg_30.txt')
437 self.assertEqual(msg.get_payload(0).get_content_type(),
438 'message/rfc822')
439
440 def test_get_content_type_from_message_explicit(self):
441 msg = self._msgobj('msg_28.txt')
442 self.assertEqual(msg.get_payload(0).get_content_type(),
443 'message/rfc822')
444
445 def test_get_content_type_from_message_text_plain_implicit(self):
446 msg = self._msgobj('msg_03.txt')
447 self.assertEqual(msg.get_content_type(), 'text/plain')
448
449 def test_get_content_type_from_message_text_plain_explicit(self):
450 msg = self._msgobj('msg_01.txt')
451 self.assertEqual(msg.get_content_type(), 'text/plain')
452
453 def test_get_content_maintype_missing(self):
454 msg = Message()
455 self.assertEqual(msg.get_content_maintype(), 'text')
456
457 def test_get_content_maintype_missing_with_default_type(self):
458 msg = Message()
459 msg.set_default_type('message/rfc822')
460 self.assertEqual(msg.get_content_maintype(), 'message')
461
462 def test_get_content_maintype_from_message_implicit(self):
463 msg = self._msgobj('msg_30.txt')
464 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
465
466 def test_get_content_maintype_from_message_explicit(self):
467 msg = self._msgobj('msg_28.txt')
468 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
469
470 def test_get_content_maintype_from_message_text_plain_implicit(self):
471 msg = self._msgobj('msg_03.txt')
472 self.assertEqual(msg.get_content_maintype(), 'text')
473
474 def test_get_content_maintype_from_message_text_plain_explicit(self):
475 msg = self._msgobj('msg_01.txt')
476 self.assertEqual(msg.get_content_maintype(), 'text')
477
478 def test_get_content_subtype_missing(self):
479 msg = Message()
480 self.assertEqual(msg.get_content_subtype(), 'plain')
481
482 def test_get_content_subtype_missing_with_default_type(self):
483 msg = Message()
484 msg.set_default_type('message/rfc822')
485 self.assertEqual(msg.get_content_subtype(), 'rfc822')
486
487 def test_get_content_subtype_from_message_implicit(self):
488 msg = self._msgobj('msg_30.txt')
489 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
490
491 def test_get_content_subtype_from_message_explicit(self):
492 msg = self._msgobj('msg_28.txt')
493 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
494
495 def test_get_content_subtype_from_message_text_plain_implicit(self):
496 msg = self._msgobj('msg_03.txt')
497 self.assertEqual(msg.get_content_subtype(), 'plain')
498
499 def test_get_content_subtype_from_message_text_plain_explicit(self):
500 msg = self._msgobj('msg_01.txt')
501 self.assertEqual(msg.get_content_subtype(), 'plain')
502
503 def test_get_content_maintype_error(self):
504 msg = Message()
505 msg['Content-Type'] = 'no-slash-in-this-string'
506 self.assertEqual(msg.get_content_maintype(), 'text')
507
508 def test_get_content_subtype_error(self):
509 msg = Message()
510 msg['Content-Type'] = 'no-slash-in-this-string'
511 self.assertEqual(msg.get_content_subtype(), 'plain')
512
513 def test_replace_header(self):
514 eq = self.assertEqual
515 msg = Message()
516 msg.add_header('First', 'One')
517 msg.add_header('Second', 'Two')
518 msg.add_header('Third', 'Three')
519 eq(msg.keys(), ['First', 'Second', 'Third'])
520 eq(msg.values(), ['One', 'Two', 'Three'])
521 msg.replace_header('Second', 'Twenty')
522 eq(msg.keys(), ['First', 'Second', 'Third'])
523 eq(msg.values(), ['One', 'Twenty', 'Three'])
524 msg.add_header('First', 'Eleven')
525 msg.replace_header('First', 'One Hundred')
526 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
527 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
528 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
529
530 def test_broken_base64_payload(self):
531 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
532 msg = Message()
533 msg['content-type'] = 'audio/x-midi'
534 msg['content-transfer-encoding'] = 'base64'
535 msg.set_payload(x)
536 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000537 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000538
R David Murraya2860e82011-04-16 09:20:30 -0400539 def test_broken_unicode_payload(self):
540 # This test improves coverage but is not a compliance test.
541 # The behavior in this situation is currently undefined by the API.
542 x = 'this is a br\xf6ken thing to do'
543 msg = Message()
544 msg['content-type'] = 'text/plain'
545 msg['content-transfer-encoding'] = '8bit'
546 msg.set_payload(x)
547 self.assertEqual(msg.get_payload(decode=True),
548 bytes(x, 'raw-unicode-escape'))
549
550 def test_questionable_bytes_payload(self):
551 # This test improves coverage but is not a compliance test,
552 # since it involves poking inside the black box.
553 x = 'this is a quéstionable thing to do'.encode('utf-8')
554 msg = Message()
555 msg['content-type'] = 'text/plain; charset="utf-8"'
556 msg['content-transfer-encoding'] = '8bit'
557 msg._payload = x
558 self.assertEqual(msg.get_payload(decode=True), x)
559
R. David Murray7ec754b2010-12-13 23:51:19 +0000560 # Issue 1078919
561 def test_ascii_add_header(self):
562 msg = Message()
563 msg.add_header('Content-Disposition', 'attachment',
564 filename='bud.gif')
565 self.assertEqual('attachment; filename="bud.gif"',
566 msg['Content-Disposition'])
567
568 def test_noascii_add_header(self):
569 msg = Message()
570 msg.add_header('Content-Disposition', 'attachment',
571 filename="Fußballer.ppt")
572 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000573 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000574 msg['Content-Disposition'])
575
576 def test_nonascii_add_header_via_triple(self):
577 msg = Message()
578 msg.add_header('Content-Disposition', 'attachment',
579 filename=('iso-8859-1', '', 'Fußballer.ppt'))
580 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000581 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
582 msg['Content-Disposition'])
583
584 def test_ascii_add_header_with_tspecial(self):
585 msg = Message()
586 msg.add_header('Content-Disposition', 'attachment',
587 filename="windows [filename].ppt")
588 self.assertEqual(
589 'attachment; filename="windows [filename].ppt"',
590 msg['Content-Disposition'])
591
592 def test_nonascii_add_header_with_tspecial(self):
593 msg = Message()
594 msg.add_header('Content-Disposition', 'attachment',
595 filename="Fußballer [filename].ppt")
596 self.assertEqual(
597 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000598 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000599
R David Murraya2860e82011-04-16 09:20:30 -0400600 def test_add_header_with_name_only_param(self):
601 msg = Message()
602 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
603 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
604
605 def test_add_header_with_no_value(self):
606 msg = Message()
607 msg.add_header('X-Status', None)
608 self.assertEqual('', msg['X-Status'])
609
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000610 # Issue 5871: reject an attempt to embed a header inside a header value
611 # (header injection attack).
612 def test_embeded_header_via_Header_rejected(self):
613 msg = Message()
614 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
615 self.assertRaises(errors.HeaderParseError, msg.as_string)
616
617 def test_embeded_header_via_string_rejected(self):
618 msg = Message()
619 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
620 self.assertRaises(errors.HeaderParseError, msg.as_string)
621
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000622# Test the email.encoders module
623class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400624
625 def test_EncodersEncode_base64(self):
626 with openfile('PyBanner048.gif', 'rb') as fp:
627 bindata = fp.read()
628 mimed = email.mime.image.MIMEImage(bindata)
629 base64ed = mimed.get_payload()
630 # the transfer-encoded body lines should all be <=76 characters
631 lines = base64ed.split('\n')
632 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
633
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000634 def test_encode_empty_payload(self):
635 eq = self.assertEqual
636 msg = Message()
637 msg.set_charset('us-ascii')
638 eq(msg['content-transfer-encoding'], '7bit')
639
640 def test_default_cte(self):
641 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000642 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000643 msg = MIMEText('hello world')
644 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000645 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000646 msg = MIMEText('hello \xf8 world')
647 eq(msg['content-transfer-encoding'], '8bit')
648 # And now with a different charset
649 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
650 eq(msg['content-transfer-encoding'], 'quoted-printable')
651
R. David Murraye85200d2010-05-06 01:41:14 +0000652 def test_encode7or8bit(self):
653 # Make sure a charset whose input character set is 8bit but
654 # whose output character set is 7bit gets a transfer-encoding
655 # of 7bit.
656 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000657 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000658 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000659
Ezio Melottib3aedd42010-11-20 19:04:17 +0000660
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000661# Test long header wrapping
662class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400663
664 maxDiff = None
665
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000666 def test_split_long_continuation(self):
667 eq = self.ndiffAssertEqual
668 msg = email.message_from_string("""\
669Subject: bug demonstration
670\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
671\tmore text
672
673test
674""")
675 sfp = StringIO()
676 g = Generator(sfp)
677 g.flatten(msg)
678 eq(sfp.getvalue(), """\
679Subject: bug demonstration
680\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
681\tmore text
682
683test
684""")
685
686 def test_another_long_almost_unsplittable_header(self):
687 eq = self.ndiffAssertEqual
688 hstr = """\
689bug demonstration
690\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
691\tmore text"""
692 h = Header(hstr, continuation_ws='\t')
693 eq(h.encode(), """\
694bug demonstration
695\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
696\tmore text""")
697 h = Header(hstr.replace('\t', ' '))
698 eq(h.encode(), """\
699bug demonstration
700 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
701 more text""")
702
703 def test_long_nonstring(self):
704 eq = self.ndiffAssertEqual
705 g = Charset("iso-8859-1")
706 cz = Charset("iso-8859-2")
707 utf8 = Charset("utf-8")
708 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
709 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
710 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
711 b'bef\xf6rdert. ')
712 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
713 b'd\xf9vtipu.. ')
714 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
715 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
716 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
717 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
718 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
719 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
720 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
721 '\u3044\u307e\u3059\u3002')
722 h = Header(g_head, g, header_name='Subject')
723 h.append(cz_head, cz)
724 h.append(utf8_head, utf8)
725 msg = Message()
726 msg['Subject'] = h
727 sfp = StringIO()
728 g = Generator(sfp)
729 g.flatten(msg)
730 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000731Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
732 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
733 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
734 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
735 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
736 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
737 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
738 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
739 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
740 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
741 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000742
743""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000744 eq(h.encode(maxlinelen=76), """\
745=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
746 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
747 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
748 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
749 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
750 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
751 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
752 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
753 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
754 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
755 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000756
757 def test_long_header_encode(self):
758 eq = self.ndiffAssertEqual
759 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
760 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
761 header_name='X-Foobar-Spoink-Defrobnit')
762 eq(h.encode(), '''\
763wasnipoop; giraffes="very-long-necked-animals";
764 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
765
766 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
767 eq = self.ndiffAssertEqual
768 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
769 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
770 header_name='X-Foobar-Spoink-Defrobnit',
771 continuation_ws='\t')
772 eq(h.encode(), '''\
773wasnipoop; giraffes="very-long-necked-animals";
774 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
775
776 def test_long_header_encode_with_tab_continuation(self):
777 eq = self.ndiffAssertEqual
778 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
779 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
780 header_name='X-Foobar-Spoink-Defrobnit',
781 continuation_ws='\t')
782 eq(h.encode(), '''\
783wasnipoop; giraffes="very-long-necked-animals";
784\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
785
R David Murray3a6152f2011-03-14 21:13:03 -0400786 def test_header_encode_with_different_output_charset(self):
787 h = Header('文', 'euc-jp')
788 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
789
790 def test_long_header_encode_with_different_output_charset(self):
791 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
792 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
793 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
794 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
795 res = """\
796=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
797 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
798 self.assertEqual(h.encode(), res)
799
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000800 def test_header_splitter(self):
801 eq = self.ndiffAssertEqual
802 msg = MIMEText('')
803 # It'd be great if we could use add_header() here, but that doesn't
804 # guarantee an order of the parameters.
805 msg['X-Foobar-Spoink-Defrobnit'] = (
806 'wasnipoop; giraffes="very-long-necked-animals"; '
807 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
808 sfp = StringIO()
809 g = Generator(sfp)
810 g.flatten(msg)
811 eq(sfp.getvalue(), '''\
812Content-Type: text/plain; charset="us-ascii"
813MIME-Version: 1.0
814Content-Transfer-Encoding: 7bit
815X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
816 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
817
818''')
819
820 def test_no_semis_header_splitter(self):
821 eq = self.ndiffAssertEqual
822 msg = Message()
823 msg['From'] = 'test@dom.ain'
824 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
825 msg.set_payload('Test')
826 sfp = StringIO()
827 g = Generator(sfp)
828 g.flatten(msg)
829 eq(sfp.getvalue(), """\
830From: test@dom.ain
831References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
832 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
833
834Test""")
835
R David Murray7da4db12011-04-07 20:37:17 -0400836 def test_last_split_chunk_does_not_fit(self):
837 eq = self.ndiffAssertEqual
838 h = Header('Subject: the first part of this is short, but_the_second'
839 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
840 '_all_by_itself')
841 eq(h.encode(), """\
842Subject: the first part of this is short,
843 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
844
845 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
846 eq = self.ndiffAssertEqual
847 h = Header(', but_the_second'
848 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
849 '_all_by_itself')
850 eq(h.encode(), """\
851,
852 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
853
854 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
855 eq = self.ndiffAssertEqual
856 h = Header(', , but_the_second'
857 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
858 '_all_by_itself')
859 eq(h.encode(), """\
860, ,
861 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
862
863 def test_trailing_splitable_on_overlong_unsplitable(self):
864 eq = self.ndiffAssertEqual
865 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
866 'be_on_a_line_all_by_itself;')
867 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
868 "be_on_a_line_all_by_itself;")
869
870 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
871 eq = self.ndiffAssertEqual
872 h = Header('; '
873 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400874 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400875 eq(h.encode(), """\
876;
R David Murray01581ee2011-04-18 10:04:34 -0400877 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400878
R David Murraye1292a22011-04-07 20:54:03 -0400879 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400880 eq = self.ndiffAssertEqual
881 h = Header('This is a long line that has two whitespaces in a row. '
882 'This used to cause truncation of the header when folded')
883 eq(h.encode(), """\
884This is a long line that has two whitespaces in a row. This used to cause
885 truncation of the header when folded""")
886
R David Murray01581ee2011-04-18 10:04:34 -0400887 def test_splitter_split_on_punctuation_only_if_fws(self):
888 eq = self.ndiffAssertEqual
889 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
890 'they;arenotlegal;fold,points')
891 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
892 "arenotlegal;fold,points")
893
894 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
895 eq = self.ndiffAssertEqual
896 h = Header('this is a test where we need to have more than one line '
897 'before; our final line that is just too big to fit;; '
898 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
899 'be_on_a_line_all_by_itself;')
900 eq(h.encode(), """\
901this is a test where we need to have more than one line before;
902 our final line that is just too big to fit;;
903 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
904
905 def test_overlong_last_part_followed_by_split_point(self):
906 eq = self.ndiffAssertEqual
907 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
908 'be_on_a_line_all_by_itself ')
909 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
910 "should_be_on_a_line_all_by_itself ")
911
912 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
913 eq = self.ndiffAssertEqual
914 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
915 'before_our_final_line_; ; '
916 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
917 'be_on_a_line_all_by_itself; ')
918 eq(h.encode(), """\
919this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
920 ;
921 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
922
923 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
924 eq = self.ndiffAssertEqual
925 h = Header('this is a test where we need to have more than one line '
926 'before our final line; ; '
927 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
928 'be_on_a_line_all_by_itself; ')
929 eq(h.encode(), """\
930this is a test where we need to have more than one line before our final line;
931 ;
932 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
933
934 def test_long_header_with_whitespace_runs(self):
935 eq = self.ndiffAssertEqual
936 msg = Message()
937 msg['From'] = 'test@dom.ain'
938 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
939 msg.set_payload('Test')
940 sfp = StringIO()
941 g = Generator(sfp)
942 g.flatten(msg)
943 eq(sfp.getvalue(), """\
944From: test@dom.ain
945References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
946 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
947 <foo@dom.ain> <foo@dom.ain>\x20\x20
948
949Test""")
950
951 def test_long_run_with_semi_header_splitter(self):
952 eq = self.ndiffAssertEqual
953 msg = Message()
954 msg['From'] = 'test@dom.ain'
955 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
956 msg.set_payload('Test')
957 sfp = StringIO()
958 g = Generator(sfp)
959 g.flatten(msg)
960 eq(sfp.getvalue(), """\
961From: test@dom.ain
962References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
963 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
964 <foo@dom.ain>; abc
965
966Test""")
967
968 def test_splitter_split_on_punctuation_only_if_fws(self):
969 eq = self.ndiffAssertEqual
970 msg = Message()
971 msg['From'] = 'test@dom.ain'
972 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
973 'they;arenotlegal;fold,points')
974 msg.set_payload('Test')
975 sfp = StringIO()
976 g = Generator(sfp)
977 g.flatten(msg)
978 # XXX the space after the header should not be there.
979 eq(sfp.getvalue(), """\
980From: test@dom.ain
981References:\x20
982 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
983
984Test""")
985
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000986 def test_no_split_long_header(self):
987 eq = self.ndiffAssertEqual
988 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000989 h = Header(hstr)
990 # These come on two lines because Headers are really field value
991 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000992 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000993References:
994 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
995 h = Header('x' * 80)
996 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000997
998 def test_splitting_multiple_long_lines(self):
999 eq = self.ndiffAssertEqual
1000 hstr = """\
1001from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1002\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1003\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1004"""
1005 h = Header(hstr, continuation_ws='\t')
1006 eq(h.encode(), """\
1007from babylon.socal-raves.org (localhost [127.0.0.1]);
1008 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1009 for <mailman-admin@babylon.socal-raves.org>;
1010 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1011\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1012 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1013 for <mailman-admin@babylon.socal-raves.org>;
1014 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1015\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1016 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1017 for <mailman-admin@babylon.socal-raves.org>;
1018 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1019
1020 def test_splitting_first_line_only_is_long(self):
1021 eq = self.ndiffAssertEqual
1022 hstr = """\
1023from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1024\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1025\tid 17k4h5-00034i-00
1026\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1027 h = Header(hstr, maxlinelen=78, header_name='Received',
1028 continuation_ws='\t')
1029 eq(h.encode(), """\
1030from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1031 helo=cthulhu.gerg.ca)
1032\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1033\tid 17k4h5-00034i-00
1034\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1035
1036 def test_long_8bit_header(self):
1037 eq = self.ndiffAssertEqual
1038 msg = Message()
1039 h = Header('Britische Regierung gibt', 'iso-8859-1',
1040 header_name='Subject')
1041 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001042 eq(h.encode(maxlinelen=76), """\
1043=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1044 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001045 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001046 eq(msg.as_string(maxheaderlen=76), """\
1047Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1048 =?iso-8859-1?q?hore-Windkraftprojekte?=
1049
1050""")
1051 eq(msg.as_string(maxheaderlen=0), """\
1052Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001053
1054""")
1055
1056 def test_long_8bit_header_no_charset(self):
1057 eq = self.ndiffAssertEqual
1058 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001059 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1060 'f\xfcr Offshore-Windkraftprojekte '
1061 '<a-very-long-address@example.com>')
1062 msg['Reply-To'] = header_string
1063 self.assertRaises(UnicodeEncodeError, msg.as_string)
1064 msg = Message()
1065 msg['Reply-To'] = Header(header_string, 'utf-8',
1066 header_name='Reply-To')
1067 eq(msg.as_string(maxheaderlen=78), """\
1068Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1069 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001070
1071""")
1072
1073 def test_long_to_header(self):
1074 eq = self.ndiffAssertEqual
1075 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001076 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001077 '"Someone Test #B" <someone@umich.edu>, '
1078 '"Someone Test #C" <someone@eecs.umich.edu>, '
1079 '"Someone Test #D" <someone@eecs.umich.edu>')
1080 msg = Message()
1081 msg['To'] = to
1082 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001083To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001084 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001085 "Someone Test #C" <someone@eecs.umich.edu>,
1086 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001087
1088''')
1089
1090 def test_long_line_after_append(self):
1091 eq = self.ndiffAssertEqual
1092 s = 'This is an example of string which has almost the limit of header length.'
1093 h = Header(s)
1094 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001095 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001096This is an example of string which has almost the limit of header length.
1097 Add another line.""")
1098
1099 def test_shorter_line_with_append(self):
1100 eq = self.ndiffAssertEqual
1101 s = 'This is a shorter line.'
1102 h = Header(s)
1103 h.append('Add another sentence. (Surprise?)')
1104 eq(h.encode(),
1105 'This is a shorter line. Add another sentence. (Surprise?)')
1106
1107 def test_long_field_name(self):
1108 eq = self.ndiffAssertEqual
1109 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001110 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1111 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1112 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1113 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001114 h = Header(gs, 'iso-8859-1', header_name=fn)
1115 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001116 eq(h.encode(maxlinelen=76), """\
1117=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1118 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1119 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1120 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001121
1122 def test_long_received_header(self):
1123 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1124 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1125 'Wed, 05 Mar 2003 18:10:18 -0700')
1126 msg = Message()
1127 msg['Received-1'] = Header(h, continuation_ws='\t')
1128 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001129 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001130 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001131Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1132 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001133 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001134Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1135 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001136 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001137
1138""")
1139
1140 def test_string_headerinst_eq(self):
1141 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1142 'tu-muenchen.de> (David Bremner\'s message of '
1143 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1144 msg = Message()
1145 msg['Received-1'] = Header(h, header_name='Received-1',
1146 continuation_ws='\t')
1147 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001148 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001149 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001150Received-1:\x20
1151 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1152 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1153Received-2:\x20
1154 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1155 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001156
1157""")
1158
1159 def test_long_unbreakable_lines_with_continuation(self):
1160 eq = self.ndiffAssertEqual
1161 msg = Message()
1162 t = """\
1163iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1164 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1165 msg['Face-1'] = t
1166 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001167 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001168 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001169 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001170 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001171Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001172 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001173 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001174Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001175 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001176 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001177Face-3:\x20
1178 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1179 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001180
1181""")
1182
1183 def test_another_long_multiline_header(self):
1184 eq = self.ndiffAssertEqual
1185 m = ('Received: from siimage.com '
1186 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001187 'Microsoft SMTPSVC(5.0.2195.4905); '
1188 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001189 msg = email.message_from_string(m)
1190 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001191Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1192 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001193
1194''')
1195
1196 def test_long_lines_with_different_header(self):
1197 eq = self.ndiffAssertEqual
1198 h = ('List-Unsubscribe: '
1199 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1200 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1201 '?subject=unsubscribe>')
1202 msg = Message()
1203 msg['List'] = h
1204 msg['List'] = Header(h, header_name='List')
1205 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001206List: List-Unsubscribe:
1207 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001208 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001209List: List-Unsubscribe:
1210 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001211 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001212
1213""")
1214
R. David Murray6f0022d2011-01-07 21:57:25 +00001215 def test_long_rfc2047_header_with_embedded_fws(self):
1216 h = Header(textwrap.dedent("""\
1217 We're going to pretend this header is in a non-ascii character set
1218 \tto see if line wrapping with encoded words and embedded
1219 folding white space works"""),
1220 charset='utf-8',
1221 header_name='Test')
1222 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1223 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1224 =?utf-8?q?cter_set?=
1225 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1226 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1227
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001228
Ezio Melottib3aedd42010-11-20 19:04:17 +00001229
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001230# Test mangling of "From " lines in the body of a message
1231class TestFromMangling(unittest.TestCase):
1232 def setUp(self):
1233 self.msg = Message()
1234 self.msg['From'] = 'aaa@bbb.org'
1235 self.msg.set_payload("""\
1236From the desk of A.A.A.:
1237Blah blah blah
1238""")
1239
1240 def test_mangled_from(self):
1241 s = StringIO()
1242 g = Generator(s, mangle_from_=True)
1243 g.flatten(self.msg)
1244 self.assertEqual(s.getvalue(), """\
1245From: aaa@bbb.org
1246
1247>From the desk of A.A.A.:
1248Blah blah blah
1249""")
1250
1251 def test_dont_mangle_from(self):
1252 s = StringIO()
1253 g = Generator(s, mangle_from_=False)
1254 g.flatten(self.msg)
1255 self.assertEqual(s.getvalue(), """\
1256From: aaa@bbb.org
1257
1258From the desk of A.A.A.:
1259Blah blah blah
1260""")
1261
1262
Ezio Melottib3aedd42010-11-20 19:04:17 +00001263
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001264# Test the basic MIMEAudio class
1265class TestMIMEAudio(unittest.TestCase):
1266 def setUp(self):
1267 # Make sure we pick up the audiotest.au that lives in email/test/data.
1268 # In Python, there's an audiotest.au living in Lib/test but that isn't
1269 # included in some binary distros that don't include the test
1270 # package. The trailing empty string on the .join() is significant
1271 # since findfile() will do a dirname().
1272 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
1273 with open(findfile('audiotest.au', datadir), 'rb') as fp:
1274 self._audiodata = fp.read()
1275 self._au = MIMEAudio(self._audiodata)
1276
1277 def test_guess_minor_type(self):
1278 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1279
1280 def test_encoding(self):
1281 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001282 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1283 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001284
1285 def test_checkSetMinor(self):
1286 au = MIMEAudio(self._audiodata, 'fish')
1287 self.assertEqual(au.get_content_type(), 'audio/fish')
1288
1289 def test_add_header(self):
1290 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001291 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001292 self._au.add_header('Content-Disposition', 'attachment',
1293 filename='audiotest.au')
1294 eq(self._au['content-disposition'],
1295 'attachment; filename="audiotest.au"')
1296 eq(self._au.get_params(header='content-disposition'),
1297 [('attachment', ''), ('filename', 'audiotest.au')])
1298 eq(self._au.get_param('filename', header='content-disposition'),
1299 'audiotest.au')
1300 missing = []
1301 eq(self._au.get_param('attachment', header='content-disposition'), '')
1302 unless(self._au.get_param('foo', failobj=missing,
1303 header='content-disposition') is missing)
1304 # Try some missing stuff
1305 unless(self._au.get_param('foobar', missing) is missing)
1306 unless(self._au.get_param('attachment', missing,
1307 header='foobar') is missing)
1308
1309
Ezio Melottib3aedd42010-11-20 19:04:17 +00001310
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001311# Test the basic MIMEImage class
1312class TestMIMEImage(unittest.TestCase):
1313 def setUp(self):
1314 with openfile('PyBanner048.gif', 'rb') as fp:
1315 self._imgdata = fp.read()
1316 self._im = MIMEImage(self._imgdata)
1317
1318 def test_guess_minor_type(self):
1319 self.assertEqual(self._im.get_content_type(), 'image/gif')
1320
1321 def test_encoding(self):
1322 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001323 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1324 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001325
1326 def test_checkSetMinor(self):
1327 im = MIMEImage(self._imgdata, 'fish')
1328 self.assertEqual(im.get_content_type(), 'image/fish')
1329
1330 def test_add_header(self):
1331 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001332 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001333 self._im.add_header('Content-Disposition', 'attachment',
1334 filename='dingusfish.gif')
1335 eq(self._im['content-disposition'],
1336 'attachment; filename="dingusfish.gif"')
1337 eq(self._im.get_params(header='content-disposition'),
1338 [('attachment', ''), ('filename', 'dingusfish.gif')])
1339 eq(self._im.get_param('filename', header='content-disposition'),
1340 'dingusfish.gif')
1341 missing = []
1342 eq(self._im.get_param('attachment', header='content-disposition'), '')
1343 unless(self._im.get_param('foo', failobj=missing,
1344 header='content-disposition') is missing)
1345 # Try some missing stuff
1346 unless(self._im.get_param('foobar', missing) is missing)
1347 unless(self._im.get_param('attachment', missing,
1348 header='foobar') is missing)
1349
1350
Ezio Melottib3aedd42010-11-20 19:04:17 +00001351
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001352# Test the basic MIMEApplication class
1353class TestMIMEApplication(unittest.TestCase):
1354 def test_headers(self):
1355 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001356 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001357 eq(msg.get_content_type(), 'application/octet-stream')
1358 eq(msg['content-transfer-encoding'], 'base64')
1359
1360 def test_body(self):
1361 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001362 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1363 msg = MIMEApplication(bytesdata)
1364 # whitespace in the cte encoded block is RFC-irrelevant.
1365 eq(msg.get_payload().strip(), '+vv8/f7/')
1366 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001367
1368
Ezio Melottib3aedd42010-11-20 19:04:17 +00001369
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001370# Test the basic MIMEText class
1371class TestMIMEText(unittest.TestCase):
1372 def setUp(self):
1373 self._msg = MIMEText('hello there')
1374
1375 def test_types(self):
1376 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001377 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001378 eq(self._msg.get_content_type(), 'text/plain')
1379 eq(self._msg.get_param('charset'), 'us-ascii')
1380 missing = []
1381 unless(self._msg.get_param('foobar', missing) is missing)
1382 unless(self._msg.get_param('charset', missing, header='foobar')
1383 is missing)
1384
1385 def test_payload(self):
1386 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001387 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001388
1389 def test_charset(self):
1390 eq = self.assertEqual
1391 msg = MIMEText('hello there', _charset='us-ascii')
1392 eq(msg.get_charset().input_charset, 'us-ascii')
1393 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1394
R. David Murray850fc852010-06-03 01:58:28 +00001395 def test_7bit_input(self):
1396 eq = self.assertEqual
1397 msg = MIMEText('hello there', _charset='us-ascii')
1398 eq(msg.get_charset().input_charset, 'us-ascii')
1399 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1400
1401 def test_7bit_input_no_charset(self):
1402 eq = self.assertEqual
1403 msg = MIMEText('hello there')
1404 eq(msg.get_charset(), 'us-ascii')
1405 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1406 self.assertTrue('hello there' in msg.as_string())
1407
1408 def test_utf8_input(self):
1409 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1410 eq = self.assertEqual
1411 msg = MIMEText(teststr, _charset='utf-8')
1412 eq(msg.get_charset().output_charset, 'utf-8')
1413 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1414 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1415
1416 @unittest.skip("can't fix because of backward compat in email5, "
1417 "will fix in email6")
1418 def test_utf8_input_no_charset(self):
1419 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1420 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1421
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001422
Ezio Melottib3aedd42010-11-20 19:04:17 +00001423
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001424# Test complicated multipart/* messages
1425class TestMultipart(TestEmailBase):
1426 def setUp(self):
1427 with openfile('PyBanner048.gif', 'rb') as fp:
1428 data = fp.read()
1429 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1430 image = MIMEImage(data, name='dingusfish.gif')
1431 image.add_header('content-disposition', 'attachment',
1432 filename='dingusfish.gif')
1433 intro = MIMEText('''\
1434Hi there,
1435
1436This is the dingus fish.
1437''')
1438 container.attach(intro)
1439 container.attach(image)
1440 container['From'] = 'Barry <barry@digicool.com>'
1441 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1442 container['Subject'] = 'Here is your dingus fish'
1443
1444 now = 987809702.54848599
1445 timetuple = time.localtime(now)
1446 if timetuple[-1] == 0:
1447 tzsecs = time.timezone
1448 else:
1449 tzsecs = time.altzone
1450 if tzsecs > 0:
1451 sign = '-'
1452 else:
1453 sign = '+'
1454 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1455 container['Date'] = time.strftime(
1456 '%a, %d %b %Y %H:%M:%S',
1457 time.localtime(now)) + tzoffset
1458 self._msg = container
1459 self._im = image
1460 self._txt = intro
1461
1462 def test_hierarchy(self):
1463 # convenience
1464 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001465 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001466 raises = self.assertRaises
1467 # tests
1468 m = self._msg
1469 unless(m.is_multipart())
1470 eq(m.get_content_type(), 'multipart/mixed')
1471 eq(len(m.get_payload()), 2)
1472 raises(IndexError, m.get_payload, 2)
1473 m0 = m.get_payload(0)
1474 m1 = m.get_payload(1)
1475 unless(m0 is self._txt)
1476 unless(m1 is self._im)
1477 eq(m.get_payload(), [m0, m1])
1478 unless(not m0.is_multipart())
1479 unless(not m1.is_multipart())
1480
1481 def test_empty_multipart_idempotent(self):
1482 text = """\
1483Content-Type: multipart/mixed; boundary="BOUNDARY"
1484MIME-Version: 1.0
1485Subject: A subject
1486To: aperson@dom.ain
1487From: bperson@dom.ain
1488
1489
1490--BOUNDARY
1491
1492
1493--BOUNDARY--
1494"""
1495 msg = Parser().parsestr(text)
1496 self.ndiffAssertEqual(text, msg.as_string())
1497
1498 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1499 outer = MIMEBase('multipart', 'mixed')
1500 outer['Subject'] = 'A subject'
1501 outer['To'] = 'aperson@dom.ain'
1502 outer['From'] = 'bperson@dom.ain'
1503 outer.set_boundary('BOUNDARY')
1504 self.ndiffAssertEqual(outer.as_string(), '''\
1505Content-Type: multipart/mixed; boundary="BOUNDARY"
1506MIME-Version: 1.0
1507Subject: A subject
1508To: aperson@dom.ain
1509From: bperson@dom.ain
1510
1511--BOUNDARY
1512
1513--BOUNDARY--''')
1514
1515 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1516 outer = MIMEBase('multipart', 'mixed')
1517 outer['Subject'] = 'A subject'
1518 outer['To'] = 'aperson@dom.ain'
1519 outer['From'] = 'bperson@dom.ain'
1520 outer.preamble = ''
1521 outer.epilogue = ''
1522 outer.set_boundary('BOUNDARY')
1523 self.ndiffAssertEqual(outer.as_string(), '''\
1524Content-Type: multipart/mixed; boundary="BOUNDARY"
1525MIME-Version: 1.0
1526Subject: A subject
1527To: aperson@dom.ain
1528From: bperson@dom.ain
1529
1530
1531--BOUNDARY
1532
1533--BOUNDARY--
1534''')
1535
1536 def test_one_part_in_a_multipart(self):
1537 eq = self.ndiffAssertEqual
1538 outer = MIMEBase('multipart', 'mixed')
1539 outer['Subject'] = 'A subject'
1540 outer['To'] = 'aperson@dom.ain'
1541 outer['From'] = 'bperson@dom.ain'
1542 outer.set_boundary('BOUNDARY')
1543 msg = MIMEText('hello world')
1544 outer.attach(msg)
1545 eq(outer.as_string(), '''\
1546Content-Type: multipart/mixed; boundary="BOUNDARY"
1547MIME-Version: 1.0
1548Subject: A subject
1549To: aperson@dom.ain
1550From: bperson@dom.ain
1551
1552--BOUNDARY
1553Content-Type: text/plain; charset="us-ascii"
1554MIME-Version: 1.0
1555Content-Transfer-Encoding: 7bit
1556
1557hello world
1558--BOUNDARY--''')
1559
1560 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1561 eq = self.ndiffAssertEqual
1562 outer = MIMEBase('multipart', 'mixed')
1563 outer['Subject'] = 'A subject'
1564 outer['To'] = 'aperson@dom.ain'
1565 outer['From'] = 'bperson@dom.ain'
1566 outer.preamble = ''
1567 msg = MIMEText('hello world')
1568 outer.attach(msg)
1569 outer.set_boundary('BOUNDARY')
1570 eq(outer.as_string(), '''\
1571Content-Type: multipart/mixed; boundary="BOUNDARY"
1572MIME-Version: 1.0
1573Subject: A subject
1574To: aperson@dom.ain
1575From: bperson@dom.ain
1576
1577
1578--BOUNDARY
1579Content-Type: text/plain; charset="us-ascii"
1580MIME-Version: 1.0
1581Content-Transfer-Encoding: 7bit
1582
1583hello world
1584--BOUNDARY--''')
1585
1586
1587 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1588 eq = self.ndiffAssertEqual
1589 outer = MIMEBase('multipart', 'mixed')
1590 outer['Subject'] = 'A subject'
1591 outer['To'] = 'aperson@dom.ain'
1592 outer['From'] = 'bperson@dom.ain'
1593 outer.preamble = None
1594 msg = MIMEText('hello world')
1595 outer.attach(msg)
1596 outer.set_boundary('BOUNDARY')
1597 eq(outer.as_string(), '''\
1598Content-Type: multipart/mixed; boundary="BOUNDARY"
1599MIME-Version: 1.0
1600Subject: A subject
1601To: aperson@dom.ain
1602From: bperson@dom.ain
1603
1604--BOUNDARY
1605Content-Type: text/plain; charset="us-ascii"
1606MIME-Version: 1.0
1607Content-Transfer-Encoding: 7bit
1608
1609hello world
1610--BOUNDARY--''')
1611
1612
1613 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1614 eq = self.ndiffAssertEqual
1615 outer = MIMEBase('multipart', 'mixed')
1616 outer['Subject'] = 'A subject'
1617 outer['To'] = 'aperson@dom.ain'
1618 outer['From'] = 'bperson@dom.ain'
1619 outer.epilogue = None
1620 msg = MIMEText('hello world')
1621 outer.attach(msg)
1622 outer.set_boundary('BOUNDARY')
1623 eq(outer.as_string(), '''\
1624Content-Type: multipart/mixed; boundary="BOUNDARY"
1625MIME-Version: 1.0
1626Subject: A subject
1627To: aperson@dom.ain
1628From: bperson@dom.ain
1629
1630--BOUNDARY
1631Content-Type: text/plain; charset="us-ascii"
1632MIME-Version: 1.0
1633Content-Transfer-Encoding: 7bit
1634
1635hello world
1636--BOUNDARY--''')
1637
1638
1639 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1640 eq = self.ndiffAssertEqual
1641 outer = MIMEBase('multipart', 'mixed')
1642 outer['Subject'] = 'A subject'
1643 outer['To'] = 'aperson@dom.ain'
1644 outer['From'] = 'bperson@dom.ain'
1645 outer.epilogue = ''
1646 msg = MIMEText('hello world')
1647 outer.attach(msg)
1648 outer.set_boundary('BOUNDARY')
1649 eq(outer.as_string(), '''\
1650Content-Type: multipart/mixed; boundary="BOUNDARY"
1651MIME-Version: 1.0
1652Subject: A subject
1653To: aperson@dom.ain
1654From: bperson@dom.ain
1655
1656--BOUNDARY
1657Content-Type: text/plain; charset="us-ascii"
1658MIME-Version: 1.0
1659Content-Transfer-Encoding: 7bit
1660
1661hello world
1662--BOUNDARY--
1663''')
1664
1665
1666 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1667 eq = self.ndiffAssertEqual
1668 outer = MIMEBase('multipart', 'mixed')
1669 outer['Subject'] = 'A subject'
1670 outer['To'] = 'aperson@dom.ain'
1671 outer['From'] = 'bperson@dom.ain'
1672 outer.epilogue = '\n'
1673 msg = MIMEText('hello world')
1674 outer.attach(msg)
1675 outer.set_boundary('BOUNDARY')
1676 eq(outer.as_string(), '''\
1677Content-Type: multipart/mixed; boundary="BOUNDARY"
1678MIME-Version: 1.0
1679Subject: A subject
1680To: aperson@dom.ain
1681From: bperson@dom.ain
1682
1683--BOUNDARY
1684Content-Type: text/plain; charset="us-ascii"
1685MIME-Version: 1.0
1686Content-Transfer-Encoding: 7bit
1687
1688hello world
1689--BOUNDARY--
1690
1691''')
1692
1693 def test_message_external_body(self):
1694 eq = self.assertEqual
1695 msg = self._msgobj('msg_36.txt')
1696 eq(len(msg.get_payload()), 2)
1697 msg1 = msg.get_payload(1)
1698 eq(msg1.get_content_type(), 'multipart/alternative')
1699 eq(len(msg1.get_payload()), 2)
1700 for subpart in msg1.get_payload():
1701 eq(subpart.get_content_type(), 'message/external-body')
1702 eq(len(subpart.get_payload()), 1)
1703 subsubpart = subpart.get_payload(0)
1704 eq(subsubpart.get_content_type(), 'text/plain')
1705
1706 def test_double_boundary(self):
1707 # msg_37.txt is a multipart that contains two dash-boundary's in a
1708 # row. Our interpretation of RFC 2046 calls for ignoring the second
1709 # and subsequent boundaries.
1710 msg = self._msgobj('msg_37.txt')
1711 self.assertEqual(len(msg.get_payload()), 3)
1712
1713 def test_nested_inner_contains_outer_boundary(self):
1714 eq = self.ndiffAssertEqual
1715 # msg_38.txt has an inner part that contains outer boundaries. My
1716 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1717 # these are illegal and should be interpreted as unterminated inner
1718 # parts.
1719 msg = self._msgobj('msg_38.txt')
1720 sfp = StringIO()
1721 iterators._structure(msg, sfp)
1722 eq(sfp.getvalue(), """\
1723multipart/mixed
1724 multipart/mixed
1725 multipart/alternative
1726 text/plain
1727 text/plain
1728 text/plain
1729 text/plain
1730""")
1731
1732 def test_nested_with_same_boundary(self):
1733 eq = self.ndiffAssertEqual
1734 # msg 39.txt is similarly evil in that it's got inner parts that use
1735 # the same boundary as outer parts. Again, I believe the way this is
1736 # parsed is closest to the spirit of RFC 2046
1737 msg = self._msgobj('msg_39.txt')
1738 sfp = StringIO()
1739 iterators._structure(msg, sfp)
1740 eq(sfp.getvalue(), """\
1741multipart/mixed
1742 multipart/mixed
1743 multipart/alternative
1744 application/octet-stream
1745 application/octet-stream
1746 text/plain
1747""")
1748
1749 def test_boundary_in_non_multipart(self):
1750 msg = self._msgobj('msg_40.txt')
1751 self.assertEqual(msg.as_string(), '''\
1752MIME-Version: 1.0
1753Content-Type: text/html; boundary="--961284236552522269"
1754
1755----961284236552522269
1756Content-Type: text/html;
1757Content-Transfer-Encoding: 7Bit
1758
1759<html></html>
1760
1761----961284236552522269--
1762''')
1763
1764 def test_boundary_with_leading_space(self):
1765 eq = self.assertEqual
1766 msg = email.message_from_string('''\
1767MIME-Version: 1.0
1768Content-Type: multipart/mixed; boundary=" XXXX"
1769
1770-- XXXX
1771Content-Type: text/plain
1772
1773
1774-- XXXX
1775Content-Type: text/plain
1776
1777-- XXXX--
1778''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001779 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001780 eq(msg.get_boundary(), ' XXXX')
1781 eq(len(msg.get_payload()), 2)
1782
1783 def test_boundary_without_trailing_newline(self):
1784 m = Parser().parsestr("""\
1785Content-Type: multipart/mixed; boundary="===============0012394164=="
1786MIME-Version: 1.0
1787
1788--===============0012394164==
1789Content-Type: image/file1.jpg
1790MIME-Version: 1.0
1791Content-Transfer-Encoding: base64
1792
1793YXNkZg==
1794--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001795 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001796
1797
Ezio Melottib3aedd42010-11-20 19:04:17 +00001798
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001799# Test some badly formatted messages
1800class TestNonConformant(TestEmailBase):
1801 def test_parse_missing_minor_type(self):
1802 eq = self.assertEqual
1803 msg = self._msgobj('msg_14.txt')
1804 eq(msg.get_content_type(), 'text/plain')
1805 eq(msg.get_content_maintype(), 'text')
1806 eq(msg.get_content_subtype(), 'plain')
1807
1808 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001809 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001810 msg = self._msgobj('msg_15.txt')
1811 # XXX We can probably eventually do better
1812 inner = msg.get_payload(0)
1813 unless(hasattr(inner, 'defects'))
1814 self.assertEqual(len(inner.defects), 1)
1815 unless(isinstance(inner.defects[0],
1816 errors.StartBoundaryNotFoundDefect))
1817
1818 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001819 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001820 msg = self._msgobj('msg_25.txt')
1821 unless(isinstance(msg.get_payload(), str))
1822 self.assertEqual(len(msg.defects), 2)
1823 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1824 unless(isinstance(msg.defects[1],
1825 errors.MultipartInvariantViolationDefect))
1826
1827 def test_invalid_content_type(self):
1828 eq = self.assertEqual
1829 neq = self.ndiffAssertEqual
1830 msg = Message()
1831 # RFC 2045, $5.2 says invalid yields text/plain
1832 msg['Content-Type'] = 'text'
1833 eq(msg.get_content_maintype(), 'text')
1834 eq(msg.get_content_subtype(), 'plain')
1835 eq(msg.get_content_type(), 'text/plain')
1836 # Clear the old value and try something /really/ invalid
1837 del msg['content-type']
1838 msg['Content-Type'] = 'foo'
1839 eq(msg.get_content_maintype(), 'text')
1840 eq(msg.get_content_subtype(), 'plain')
1841 eq(msg.get_content_type(), 'text/plain')
1842 # Still, make sure that the message is idempotently generated
1843 s = StringIO()
1844 g = Generator(s)
1845 g.flatten(msg)
1846 neq(s.getvalue(), 'Content-Type: foo\n\n')
1847
1848 def test_no_start_boundary(self):
1849 eq = self.ndiffAssertEqual
1850 msg = self._msgobj('msg_31.txt')
1851 eq(msg.get_payload(), """\
1852--BOUNDARY
1853Content-Type: text/plain
1854
1855message 1
1856
1857--BOUNDARY
1858Content-Type: text/plain
1859
1860message 2
1861
1862--BOUNDARY--
1863""")
1864
1865 def test_no_separating_blank_line(self):
1866 eq = self.ndiffAssertEqual
1867 msg = self._msgobj('msg_35.txt')
1868 eq(msg.as_string(), """\
1869From: aperson@dom.ain
1870To: bperson@dom.ain
1871Subject: here's something interesting
1872
1873counter to RFC 2822, there's no separating newline here
1874""")
1875
1876 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001877 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001878 msg = self._msgobj('msg_41.txt')
1879 unless(hasattr(msg, 'defects'))
1880 self.assertEqual(len(msg.defects), 2)
1881 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1882 unless(isinstance(msg.defects[1],
1883 errors.MultipartInvariantViolationDefect))
1884
1885 def test_missing_start_boundary(self):
1886 outer = self._msgobj('msg_42.txt')
1887 # The message structure is:
1888 #
1889 # multipart/mixed
1890 # text/plain
1891 # message/rfc822
1892 # multipart/mixed [*]
1893 #
1894 # [*] This message is missing its start boundary
1895 bad = outer.get_payload(1).get_payload(0)
1896 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001897 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001898 errors.StartBoundaryNotFoundDefect))
1899
1900 def test_first_line_is_continuation_header(self):
1901 eq = self.assertEqual
1902 m = ' Line 1\nLine 2\nLine 3'
1903 msg = email.message_from_string(m)
1904 eq(msg.keys(), [])
1905 eq(msg.get_payload(), 'Line 2\nLine 3')
1906 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001907 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001908 errors.FirstHeaderLineIsContinuationDefect))
1909 eq(msg.defects[0].line, ' Line 1\n')
1910
1911
Ezio Melottib3aedd42010-11-20 19:04:17 +00001912
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001913# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001914class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001915 def test_rfc2047_multiline(self):
1916 eq = self.assertEqual
1917 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1918 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1919 dh = decode_header(s)
1920 eq(dh, [
1921 (b'Re:', None),
1922 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1923 (b'baz foo bar', None),
1924 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1925 header = make_header(dh)
1926 eq(str(header),
1927 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001928 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001929Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1930 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001931
1932 def test_whitespace_eater_unicode(self):
1933 eq = self.assertEqual
1934 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1935 dh = decode_header(s)
1936 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1937 (b'Pirard <pirard@dom.ain>', None)])
1938 header = str(make_header(dh))
1939 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1940
1941 def test_whitespace_eater_unicode_2(self):
1942 eq = self.assertEqual
1943 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1944 dh = decode_header(s)
1945 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1946 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1947 hu = str(make_header(dh))
1948 eq(hu, 'The quick brown fox jumped over the lazy dog')
1949
1950 def test_rfc2047_missing_whitespace(self):
1951 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1952 dh = decode_header(s)
1953 self.assertEqual(dh, [(s, None)])
1954
1955 def test_rfc2047_with_whitespace(self):
1956 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1957 dh = decode_header(s)
1958 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1959 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1960 (b'sbord', None)])
1961
R. David Murrayc4e69cc2010-08-03 22:14:10 +00001962 def test_rfc2047_B_bad_padding(self):
1963 s = '=?iso-8859-1?B?%s?='
1964 data = [ # only test complete bytes
1965 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
1966 ('dmk=', b'vi'), ('dmk', b'vi')
1967 ]
1968 for q, a in data:
1969 dh = decode_header(s % q)
1970 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001971
R. David Murray31e984c2010-10-01 15:40:20 +00001972 def test_rfc2047_Q_invalid_digits(self):
1973 # issue 10004.
1974 s = '=?iso-8659-1?Q?andr=e9=zz?='
1975 self.assertEqual(decode_header(s),
1976 [(b'andr\xe9=zz', 'iso-8659-1')])
1977
Ezio Melottib3aedd42010-11-20 19:04:17 +00001978
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001979# Test the MIMEMessage class
1980class TestMIMEMessage(TestEmailBase):
1981 def setUp(self):
1982 with openfile('msg_11.txt') as fp:
1983 self._text = fp.read()
1984
1985 def test_type_error(self):
1986 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1987
1988 def test_valid_argument(self):
1989 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001990 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001991 subject = 'A sub-message'
1992 m = Message()
1993 m['Subject'] = subject
1994 r = MIMEMessage(m)
1995 eq(r.get_content_type(), 'message/rfc822')
1996 payload = r.get_payload()
1997 unless(isinstance(payload, list))
1998 eq(len(payload), 1)
1999 subpart = payload[0]
2000 unless(subpart is m)
2001 eq(subpart['subject'], subject)
2002
2003 def test_bad_multipart(self):
2004 eq = self.assertEqual
2005 msg1 = Message()
2006 msg1['Subject'] = 'subpart 1'
2007 msg2 = Message()
2008 msg2['Subject'] = 'subpart 2'
2009 r = MIMEMessage(msg1)
2010 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2011
2012 def test_generate(self):
2013 # First craft the message to be encapsulated
2014 m = Message()
2015 m['Subject'] = 'An enclosed message'
2016 m.set_payload('Here is the body of the message.\n')
2017 r = MIMEMessage(m)
2018 r['Subject'] = 'The enclosing message'
2019 s = StringIO()
2020 g = Generator(s)
2021 g.flatten(r)
2022 self.assertEqual(s.getvalue(), """\
2023Content-Type: message/rfc822
2024MIME-Version: 1.0
2025Subject: The enclosing message
2026
2027Subject: An enclosed message
2028
2029Here is the body of the message.
2030""")
2031
2032 def test_parse_message_rfc822(self):
2033 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002034 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002035 msg = self._msgobj('msg_11.txt')
2036 eq(msg.get_content_type(), 'message/rfc822')
2037 payload = msg.get_payload()
2038 unless(isinstance(payload, list))
2039 eq(len(payload), 1)
2040 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002041 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002042 eq(submsg['subject'], 'An enclosed message')
2043 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2044
2045 def test_dsn(self):
2046 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002047 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002048 # msg 16 is a Delivery Status Notification, see RFC 1894
2049 msg = self._msgobj('msg_16.txt')
2050 eq(msg.get_content_type(), 'multipart/report')
2051 unless(msg.is_multipart())
2052 eq(len(msg.get_payload()), 3)
2053 # Subpart 1 is a text/plain, human readable section
2054 subpart = msg.get_payload(0)
2055 eq(subpart.get_content_type(), 'text/plain')
2056 eq(subpart.get_payload(), """\
2057This report relates to a message you sent with the following header fields:
2058
2059 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2060 Date: Sun, 23 Sep 2001 20:10:55 -0700
2061 From: "Ian T. Henry" <henryi@oxy.edu>
2062 To: SoCal Raves <scr@socal-raves.org>
2063 Subject: [scr] yeah for Ians!!
2064
2065Your message cannot be delivered to the following recipients:
2066
2067 Recipient address: jangel1@cougar.noc.ucla.edu
2068 Reason: recipient reached disk quota
2069
2070""")
2071 # Subpart 2 contains the machine parsable DSN information. It
2072 # consists of two blocks of headers, represented by two nested Message
2073 # objects.
2074 subpart = msg.get_payload(1)
2075 eq(subpart.get_content_type(), 'message/delivery-status')
2076 eq(len(subpart.get_payload()), 2)
2077 # message/delivery-status should treat each block as a bunch of
2078 # headers, i.e. a bunch of Message objects.
2079 dsn1 = subpart.get_payload(0)
2080 unless(isinstance(dsn1, Message))
2081 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2082 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2083 # Try a missing one <wink>
2084 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2085 dsn2 = subpart.get_payload(1)
2086 unless(isinstance(dsn2, Message))
2087 eq(dsn2['action'], 'failed')
2088 eq(dsn2.get_params(header='original-recipient'),
2089 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2090 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2091 # Subpart 3 is the original message
2092 subpart = msg.get_payload(2)
2093 eq(subpart.get_content_type(), 'message/rfc822')
2094 payload = subpart.get_payload()
2095 unless(isinstance(payload, list))
2096 eq(len(payload), 1)
2097 subsubpart = payload[0]
2098 unless(isinstance(subsubpart, Message))
2099 eq(subsubpart.get_content_type(), 'text/plain')
2100 eq(subsubpart['message-id'],
2101 '<002001c144a6$8752e060$56104586@oxy.edu>')
2102
2103 def test_epilogue(self):
2104 eq = self.ndiffAssertEqual
2105 with openfile('msg_21.txt') as fp:
2106 text = fp.read()
2107 msg = Message()
2108 msg['From'] = 'aperson@dom.ain'
2109 msg['To'] = 'bperson@dom.ain'
2110 msg['Subject'] = 'Test'
2111 msg.preamble = 'MIME message'
2112 msg.epilogue = 'End of MIME message\n'
2113 msg1 = MIMEText('One')
2114 msg2 = MIMEText('Two')
2115 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2116 msg.attach(msg1)
2117 msg.attach(msg2)
2118 sfp = StringIO()
2119 g = Generator(sfp)
2120 g.flatten(msg)
2121 eq(sfp.getvalue(), text)
2122
2123 def test_no_nl_preamble(self):
2124 eq = self.ndiffAssertEqual
2125 msg = Message()
2126 msg['From'] = 'aperson@dom.ain'
2127 msg['To'] = 'bperson@dom.ain'
2128 msg['Subject'] = 'Test'
2129 msg.preamble = 'MIME message'
2130 msg.epilogue = ''
2131 msg1 = MIMEText('One')
2132 msg2 = MIMEText('Two')
2133 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2134 msg.attach(msg1)
2135 msg.attach(msg2)
2136 eq(msg.as_string(), """\
2137From: aperson@dom.ain
2138To: bperson@dom.ain
2139Subject: Test
2140Content-Type: multipart/mixed; boundary="BOUNDARY"
2141
2142MIME message
2143--BOUNDARY
2144Content-Type: text/plain; charset="us-ascii"
2145MIME-Version: 1.0
2146Content-Transfer-Encoding: 7bit
2147
2148One
2149--BOUNDARY
2150Content-Type: text/plain; charset="us-ascii"
2151MIME-Version: 1.0
2152Content-Transfer-Encoding: 7bit
2153
2154Two
2155--BOUNDARY--
2156""")
2157
2158 def test_default_type(self):
2159 eq = self.assertEqual
2160 with openfile('msg_30.txt') as fp:
2161 msg = email.message_from_file(fp)
2162 container1 = msg.get_payload(0)
2163 eq(container1.get_default_type(), 'message/rfc822')
2164 eq(container1.get_content_type(), 'message/rfc822')
2165 container2 = msg.get_payload(1)
2166 eq(container2.get_default_type(), 'message/rfc822')
2167 eq(container2.get_content_type(), 'message/rfc822')
2168 container1a = container1.get_payload(0)
2169 eq(container1a.get_default_type(), 'text/plain')
2170 eq(container1a.get_content_type(), 'text/plain')
2171 container2a = container2.get_payload(0)
2172 eq(container2a.get_default_type(), 'text/plain')
2173 eq(container2a.get_content_type(), 'text/plain')
2174
2175 def test_default_type_with_explicit_container_type(self):
2176 eq = self.assertEqual
2177 with openfile('msg_28.txt') as fp:
2178 msg = email.message_from_file(fp)
2179 container1 = msg.get_payload(0)
2180 eq(container1.get_default_type(), 'message/rfc822')
2181 eq(container1.get_content_type(), 'message/rfc822')
2182 container2 = msg.get_payload(1)
2183 eq(container2.get_default_type(), 'message/rfc822')
2184 eq(container2.get_content_type(), 'message/rfc822')
2185 container1a = container1.get_payload(0)
2186 eq(container1a.get_default_type(), 'text/plain')
2187 eq(container1a.get_content_type(), 'text/plain')
2188 container2a = container2.get_payload(0)
2189 eq(container2a.get_default_type(), 'text/plain')
2190 eq(container2a.get_content_type(), 'text/plain')
2191
2192 def test_default_type_non_parsed(self):
2193 eq = self.assertEqual
2194 neq = self.ndiffAssertEqual
2195 # Set up container
2196 container = MIMEMultipart('digest', 'BOUNDARY')
2197 container.epilogue = ''
2198 # Set up subparts
2199 subpart1a = MIMEText('message 1\n')
2200 subpart2a = MIMEText('message 2\n')
2201 subpart1 = MIMEMessage(subpart1a)
2202 subpart2 = MIMEMessage(subpart2a)
2203 container.attach(subpart1)
2204 container.attach(subpart2)
2205 eq(subpart1.get_content_type(), 'message/rfc822')
2206 eq(subpart1.get_default_type(), 'message/rfc822')
2207 eq(subpart2.get_content_type(), 'message/rfc822')
2208 eq(subpart2.get_default_type(), 'message/rfc822')
2209 neq(container.as_string(0), '''\
2210Content-Type: multipart/digest; boundary="BOUNDARY"
2211MIME-Version: 1.0
2212
2213--BOUNDARY
2214Content-Type: message/rfc822
2215MIME-Version: 1.0
2216
2217Content-Type: text/plain; charset="us-ascii"
2218MIME-Version: 1.0
2219Content-Transfer-Encoding: 7bit
2220
2221message 1
2222
2223--BOUNDARY
2224Content-Type: message/rfc822
2225MIME-Version: 1.0
2226
2227Content-Type: text/plain; charset="us-ascii"
2228MIME-Version: 1.0
2229Content-Transfer-Encoding: 7bit
2230
2231message 2
2232
2233--BOUNDARY--
2234''')
2235 del subpart1['content-type']
2236 del subpart1['mime-version']
2237 del subpart2['content-type']
2238 del subpart2['mime-version']
2239 eq(subpart1.get_content_type(), 'message/rfc822')
2240 eq(subpart1.get_default_type(), 'message/rfc822')
2241 eq(subpart2.get_content_type(), 'message/rfc822')
2242 eq(subpart2.get_default_type(), 'message/rfc822')
2243 neq(container.as_string(0), '''\
2244Content-Type: multipart/digest; boundary="BOUNDARY"
2245MIME-Version: 1.0
2246
2247--BOUNDARY
2248
2249Content-Type: text/plain; charset="us-ascii"
2250MIME-Version: 1.0
2251Content-Transfer-Encoding: 7bit
2252
2253message 1
2254
2255--BOUNDARY
2256
2257Content-Type: text/plain; charset="us-ascii"
2258MIME-Version: 1.0
2259Content-Transfer-Encoding: 7bit
2260
2261message 2
2262
2263--BOUNDARY--
2264''')
2265
2266 def test_mime_attachments_in_constructor(self):
2267 eq = self.assertEqual
2268 text1 = MIMEText('')
2269 text2 = MIMEText('')
2270 msg = MIMEMultipart(_subparts=(text1, text2))
2271 eq(len(msg.get_payload()), 2)
2272 eq(msg.get_payload(0), text1)
2273 eq(msg.get_payload(1), text2)
2274
Christian Heimes587c2bf2008-01-19 16:21:02 +00002275 def test_default_multipart_constructor(self):
2276 msg = MIMEMultipart()
2277 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002278
Ezio Melottib3aedd42010-11-20 19:04:17 +00002279
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002280# A general test of parser->model->generator idempotency. IOW, read a message
2281# in, parse it into a message object tree, then without touching the tree,
2282# regenerate the plain text. The original text and the transformed text
2283# should be identical. Note: that we ignore the Unix-From since that may
2284# contain a changed date.
2285class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002286
2287 linesep = '\n'
2288
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002289 def _msgobj(self, filename):
2290 with openfile(filename) as fp:
2291 data = fp.read()
2292 msg = email.message_from_string(data)
2293 return msg, data
2294
R. David Murray719a4492010-11-21 16:53:48 +00002295 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002296 eq = self.ndiffAssertEqual
2297 s = StringIO()
2298 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002299 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002300 eq(text, s.getvalue())
2301
2302 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002303 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002304 msg, text = self._msgobj('msg_01.txt')
2305 eq(msg.get_content_type(), 'text/plain')
2306 eq(msg.get_content_maintype(), 'text')
2307 eq(msg.get_content_subtype(), 'plain')
2308 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2309 eq(msg.get_param('charset'), 'us-ascii')
2310 eq(msg.preamble, None)
2311 eq(msg.epilogue, None)
2312 self._idempotent(msg, text)
2313
2314 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002315 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002316 msg, text = self._msgobj('msg_03.txt')
2317 eq(msg.get_content_type(), 'text/plain')
2318 eq(msg.get_params(), None)
2319 eq(msg.get_param('charset'), None)
2320 self._idempotent(msg, text)
2321
2322 def test_simple_multipart(self):
2323 msg, text = self._msgobj('msg_04.txt')
2324 self._idempotent(msg, text)
2325
2326 def test_MIME_digest(self):
2327 msg, text = self._msgobj('msg_02.txt')
2328 self._idempotent(msg, text)
2329
2330 def test_long_header(self):
2331 msg, text = self._msgobj('msg_27.txt')
2332 self._idempotent(msg, text)
2333
2334 def test_MIME_digest_with_part_headers(self):
2335 msg, text = self._msgobj('msg_28.txt')
2336 self._idempotent(msg, text)
2337
2338 def test_mixed_with_image(self):
2339 msg, text = self._msgobj('msg_06.txt')
2340 self._idempotent(msg, text)
2341
2342 def test_multipart_report(self):
2343 msg, text = self._msgobj('msg_05.txt')
2344 self._idempotent(msg, text)
2345
2346 def test_dsn(self):
2347 msg, text = self._msgobj('msg_16.txt')
2348 self._idempotent(msg, text)
2349
2350 def test_preamble_epilogue(self):
2351 msg, text = self._msgobj('msg_21.txt')
2352 self._idempotent(msg, text)
2353
2354 def test_multipart_one_part(self):
2355 msg, text = self._msgobj('msg_23.txt')
2356 self._idempotent(msg, text)
2357
2358 def test_multipart_no_parts(self):
2359 msg, text = self._msgobj('msg_24.txt')
2360 self._idempotent(msg, text)
2361
2362 def test_no_start_boundary(self):
2363 msg, text = self._msgobj('msg_31.txt')
2364 self._idempotent(msg, text)
2365
2366 def test_rfc2231_charset(self):
2367 msg, text = self._msgobj('msg_32.txt')
2368 self._idempotent(msg, text)
2369
2370 def test_more_rfc2231_parameters(self):
2371 msg, text = self._msgobj('msg_33.txt')
2372 self._idempotent(msg, text)
2373
2374 def test_text_plain_in_a_multipart_digest(self):
2375 msg, text = self._msgobj('msg_34.txt')
2376 self._idempotent(msg, text)
2377
2378 def test_nested_multipart_mixeds(self):
2379 msg, text = self._msgobj('msg_12a.txt')
2380 self._idempotent(msg, text)
2381
2382 def test_message_external_body_idempotent(self):
2383 msg, text = self._msgobj('msg_36.txt')
2384 self._idempotent(msg, text)
2385
R. David Murray719a4492010-11-21 16:53:48 +00002386 def test_message_delivery_status(self):
2387 msg, text = self._msgobj('msg_43.txt')
2388 self._idempotent(msg, text, unixfrom=True)
2389
R. David Murray96fd54e2010-10-08 15:55:28 +00002390 def test_message_signed_idempotent(self):
2391 msg, text = self._msgobj('msg_45.txt')
2392 self._idempotent(msg, text)
2393
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002394 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002395 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002396 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002397 # Get a message object and reset the seek pointer for other tests
2398 msg, text = self._msgobj('msg_05.txt')
2399 eq(msg.get_content_type(), 'multipart/report')
2400 # Test the Content-Type: parameters
2401 params = {}
2402 for pk, pv in msg.get_params():
2403 params[pk] = pv
2404 eq(params['report-type'], 'delivery-status')
2405 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002406 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2407 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002408 eq(len(msg.get_payload()), 3)
2409 # Make sure the subparts are what we expect
2410 msg1 = msg.get_payload(0)
2411 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002412 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002413 msg2 = msg.get_payload(1)
2414 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002415 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002416 msg3 = msg.get_payload(2)
2417 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002418 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002419 payload = msg3.get_payload()
2420 unless(isinstance(payload, list))
2421 eq(len(payload), 1)
2422 msg4 = payload[0]
2423 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002424 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002425
2426 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002427 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002428 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002429 msg, text = self._msgobj('msg_06.txt')
2430 # Check some of the outer headers
2431 eq(msg.get_content_type(), 'message/rfc822')
2432 # Make sure the payload is a list of exactly one sub-Message, and that
2433 # that submessage has a type of text/plain
2434 payload = msg.get_payload()
2435 unless(isinstance(payload, list))
2436 eq(len(payload), 1)
2437 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002438 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002439 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002440 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002441 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002442
2443
Ezio Melottib3aedd42010-11-20 19:04:17 +00002444
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002445# Test various other bits of the package's functionality
2446class TestMiscellaneous(TestEmailBase):
2447 def test_message_from_string(self):
2448 with openfile('msg_01.txt') as fp:
2449 text = fp.read()
2450 msg = email.message_from_string(text)
2451 s = StringIO()
2452 # Don't wrap/continue long headers since we're trying to test
2453 # idempotency.
2454 g = Generator(s, maxheaderlen=0)
2455 g.flatten(msg)
2456 self.assertEqual(text, s.getvalue())
2457
2458 def test_message_from_file(self):
2459 with openfile('msg_01.txt') as fp:
2460 text = fp.read()
2461 fp.seek(0)
2462 msg = email.message_from_file(fp)
2463 s = StringIO()
2464 # Don't wrap/continue long headers since we're trying to test
2465 # idempotency.
2466 g = Generator(s, maxheaderlen=0)
2467 g.flatten(msg)
2468 self.assertEqual(text, s.getvalue())
2469
2470 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002471 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002472 with openfile('msg_01.txt') as fp:
2473 text = fp.read()
2474
2475 # Create a subclass
2476 class MyMessage(Message):
2477 pass
2478
2479 msg = email.message_from_string(text, MyMessage)
2480 unless(isinstance(msg, MyMessage))
2481 # Try something more complicated
2482 with openfile('msg_02.txt') as fp:
2483 text = fp.read()
2484 msg = email.message_from_string(text, MyMessage)
2485 for subpart in msg.walk():
2486 unless(isinstance(subpart, MyMessage))
2487
2488 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002489 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002490 # Create a subclass
2491 class MyMessage(Message):
2492 pass
2493
2494 with openfile('msg_01.txt') as fp:
2495 msg = email.message_from_file(fp, MyMessage)
2496 unless(isinstance(msg, MyMessage))
2497 # Try something more complicated
2498 with openfile('msg_02.txt') as fp:
2499 msg = email.message_from_file(fp, MyMessage)
2500 for subpart in msg.walk():
2501 unless(isinstance(subpart, MyMessage))
2502
2503 def test__all__(self):
2504 module = __import__('email')
2505 # Can't use sorted() here due to Python 2.3 compatibility
2506 all = module.__all__[:]
2507 all.sort()
2508 self.assertEqual(all, [
2509 'base64mime', 'charset', 'encoders', 'errors', 'generator',
R. David Murray96fd54e2010-10-08 15:55:28 +00002510 'header', 'iterators', 'message', 'message_from_binary_file',
2511 'message_from_bytes', 'message_from_file',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002512 'message_from_string', 'mime', 'parser',
2513 'quoprimime', 'utils',
2514 ])
2515
2516 def test_formatdate(self):
2517 now = time.time()
2518 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2519 time.gmtime(now)[:6])
2520
2521 def test_formatdate_localtime(self):
2522 now = time.time()
2523 self.assertEqual(
2524 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2525 time.localtime(now)[:6])
2526
2527 def test_formatdate_usegmt(self):
2528 now = time.time()
2529 self.assertEqual(
2530 utils.formatdate(now, localtime=False),
2531 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2532 self.assertEqual(
2533 utils.formatdate(now, localtime=False, usegmt=True),
2534 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2535
2536 def test_parsedate_none(self):
2537 self.assertEqual(utils.parsedate(''), None)
2538
2539 def test_parsedate_compact(self):
2540 # The FWS after the comma is optional
2541 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2542 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2543
2544 def test_parsedate_no_dayofweek(self):
2545 eq = self.assertEqual
2546 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2547 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2548
2549 def test_parsedate_compact_no_dayofweek(self):
2550 eq = self.assertEqual
2551 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2552 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2553
R. David Murray4a62e892010-12-23 20:35:46 +00002554 def test_parsedate_no_space_before_positive_offset(self):
2555 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2556 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2557
2558 def test_parsedate_no_space_before_negative_offset(self):
2559 # Issue 1155362: we already handled '+' for this case.
2560 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2561 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2562
2563
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002564 def test_parsedate_acceptable_to_time_functions(self):
2565 eq = self.assertEqual
2566 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2567 t = int(time.mktime(timetup))
2568 eq(time.localtime(t)[:6], timetup[:6])
2569 eq(int(time.strftime('%Y', timetup)), 2003)
2570 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2571 t = int(time.mktime(timetup[:9]))
2572 eq(time.localtime(t)[:6], timetup[:6])
2573 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2574
R. David Murray219d1c82010-08-25 00:45:55 +00002575 def test_parsedate_y2k(self):
2576 """Test for parsing a date with a two-digit year.
2577
2578 Parsing a date with a two-digit year should return the correct
2579 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2580 obsoletes RFC822) requires four-digit years.
2581
2582 """
2583 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2584 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2585 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2586 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2587
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002588 def test_parseaddr_empty(self):
2589 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2590 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2591
2592 def test_noquote_dump(self):
2593 self.assertEqual(
2594 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2595 'A Silly Person <person@dom.ain>')
2596
2597 def test_escape_dump(self):
2598 self.assertEqual(
2599 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2600 r'"A \(Very\) Silly Person" <person@dom.ain>')
2601 a = r'A \(Special\) Person'
2602 b = 'person@dom.ain'
2603 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2604
2605 def test_escape_backslashes(self):
2606 self.assertEqual(
2607 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2608 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2609 a = r'Arthur \Backslash\ Foobar'
2610 b = 'person@dom.ain'
2611 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2612
2613 def test_name_with_dot(self):
2614 x = 'John X. Doe <jxd@example.com>'
2615 y = '"John X. Doe" <jxd@example.com>'
2616 a, b = ('John X. Doe', 'jxd@example.com')
2617 self.assertEqual(utils.parseaddr(x), (a, b))
2618 self.assertEqual(utils.parseaddr(y), (a, b))
2619 # formataddr() quotes the name if there's a dot in it
2620 self.assertEqual(utils.formataddr((a, b)), y)
2621
R. David Murray5397e862010-10-02 15:58:26 +00002622 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2623 # issue 10005. Note that in the third test the second pair of
2624 # backslashes is not actually a quoted pair because it is not inside a
2625 # comment or quoted string: the address being parsed has a quoted
2626 # string containing a quoted backslash, followed by 'example' and two
2627 # backslashes, followed by another quoted string containing a space and
2628 # the word 'example'. parseaddr copies those two backslashes
2629 # literally. Per rfc5322 this is not technically correct since a \ may
2630 # not appear in an address outside of a quoted string. It is probably
2631 # a sensible Postel interpretation, though.
2632 eq = self.assertEqual
2633 eq(utils.parseaddr('""example" example"@example.com'),
2634 ('', '""example" example"@example.com'))
2635 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2636 ('', '"\\"example\\" example"@example.com'))
2637 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2638 ('', '"\\\\"example\\\\" example"@example.com'))
2639
R. David Murray63563cd2010-12-18 18:25:38 +00002640 def test_parseaddr_preserves_spaces_in_local_part(self):
2641 # issue 9286. A normal RFC5322 local part should not contain any
2642 # folding white space, but legacy local parts can (they are a sequence
2643 # of atoms, not dotatoms). On the other hand we strip whitespace from
2644 # before the @ and around dots, on the assumption that the whitespace
2645 # around the punctuation is a mistake in what would otherwise be
2646 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2647 self.assertEqual(('', "merwok wok@xample.com"),
2648 utils.parseaddr("merwok wok@xample.com"))
2649 self.assertEqual(('', "merwok wok@xample.com"),
2650 utils.parseaddr("merwok wok@xample.com"))
2651 self.assertEqual(('', "merwok wok@xample.com"),
2652 utils.parseaddr(" merwok wok @xample.com"))
2653 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2654 utils.parseaddr('merwok"wok" wok@xample.com'))
2655 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2656 utils.parseaddr('merwok. wok . wok@xample.com'))
2657
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002658 def test_multiline_from_comment(self):
2659 x = """\
2660Foo
2661\tBar <foo@example.com>"""
2662 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2663
2664 def test_quote_dump(self):
2665 self.assertEqual(
2666 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2667 r'"A Silly; Person" <person@dom.ain>')
2668
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002669 def test_charset_richcomparisons(self):
2670 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002671 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002672 cset1 = Charset()
2673 cset2 = Charset()
2674 eq(cset1, 'us-ascii')
2675 eq(cset1, 'US-ASCII')
2676 eq(cset1, 'Us-AsCiI')
2677 eq('us-ascii', cset1)
2678 eq('US-ASCII', cset1)
2679 eq('Us-AsCiI', cset1)
2680 ne(cset1, 'usascii')
2681 ne(cset1, 'USASCII')
2682 ne(cset1, 'UsAsCiI')
2683 ne('usascii', cset1)
2684 ne('USASCII', cset1)
2685 ne('UsAsCiI', cset1)
2686 eq(cset1, cset2)
2687 eq(cset2, cset1)
2688
2689 def test_getaddresses(self):
2690 eq = self.assertEqual
2691 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2692 'Bud Person <bperson@dom.ain>']),
2693 [('Al Person', 'aperson@dom.ain'),
2694 ('Bud Person', 'bperson@dom.ain')])
2695
2696 def test_getaddresses_nasty(self):
2697 eq = self.assertEqual
2698 eq(utils.getaddresses(['foo: ;']), [('', '')])
2699 eq(utils.getaddresses(
2700 ['[]*-- =~$']),
2701 [('', ''), ('', ''), ('', '*--')])
2702 eq(utils.getaddresses(
2703 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2704 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2705
2706 def test_getaddresses_embedded_comment(self):
2707 """Test proper handling of a nested comment"""
2708 eq = self.assertEqual
2709 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2710 eq(addrs[0][1], 'foo@bar.com')
2711
2712 def test_utils_quote_unquote(self):
2713 eq = self.assertEqual
2714 msg = Message()
2715 msg.add_header('content-disposition', 'attachment',
2716 filename='foo\\wacky"name')
2717 eq(msg.get_filename(), 'foo\\wacky"name')
2718
2719 def test_get_body_encoding_with_bogus_charset(self):
2720 charset = Charset('not a charset')
2721 self.assertEqual(charset.get_body_encoding(), 'base64')
2722
2723 def test_get_body_encoding_with_uppercase_charset(self):
2724 eq = self.assertEqual
2725 msg = Message()
2726 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2727 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2728 charsets = msg.get_charsets()
2729 eq(len(charsets), 1)
2730 eq(charsets[0], 'utf-8')
2731 charset = Charset(charsets[0])
2732 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002733 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002734 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2735 eq(msg.get_payload(decode=True), b'hello world')
2736 eq(msg['content-transfer-encoding'], 'base64')
2737 # Try another one
2738 msg = Message()
2739 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2740 charsets = msg.get_charsets()
2741 eq(len(charsets), 1)
2742 eq(charsets[0], 'us-ascii')
2743 charset = Charset(charsets[0])
2744 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2745 msg.set_payload('hello world', charset=charset)
2746 eq(msg.get_payload(), 'hello world')
2747 eq(msg['content-transfer-encoding'], '7bit')
2748
2749 def test_charsets_case_insensitive(self):
2750 lc = Charset('us-ascii')
2751 uc = Charset('US-ASCII')
2752 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2753
2754 def test_partial_falls_inside_message_delivery_status(self):
2755 eq = self.ndiffAssertEqual
2756 # The Parser interface provides chunks of data to FeedParser in 8192
2757 # byte gulps. SF bug #1076485 found one of those chunks inside
2758 # message/delivery-status header block, which triggered an
2759 # unreadline() of NeedMoreData.
2760 msg = self._msgobj('msg_43.txt')
2761 sfp = StringIO()
2762 iterators._structure(msg, sfp)
2763 eq(sfp.getvalue(), """\
2764multipart/report
2765 text/plain
2766 message/delivery-status
2767 text/plain
2768 text/plain
2769 text/plain
2770 text/plain
2771 text/plain
2772 text/plain
2773 text/plain
2774 text/plain
2775 text/plain
2776 text/plain
2777 text/plain
2778 text/plain
2779 text/plain
2780 text/plain
2781 text/plain
2782 text/plain
2783 text/plain
2784 text/plain
2785 text/plain
2786 text/plain
2787 text/plain
2788 text/plain
2789 text/plain
2790 text/plain
2791 text/plain
2792 text/plain
2793 text/rfc822-headers
2794""")
2795
R. David Murraya0b44b52010-12-02 21:47:19 +00002796 def test_make_msgid_domain(self):
2797 self.assertEqual(
2798 email.utils.make_msgid(domain='testdomain-string')[-19:],
2799 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002800
Ezio Melottib3aedd42010-11-20 19:04:17 +00002801
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002802# Test the iterator/generators
2803class TestIterators(TestEmailBase):
2804 def test_body_line_iterator(self):
2805 eq = self.assertEqual
2806 neq = self.ndiffAssertEqual
2807 # First a simple non-multipart message
2808 msg = self._msgobj('msg_01.txt')
2809 it = iterators.body_line_iterator(msg)
2810 lines = list(it)
2811 eq(len(lines), 6)
2812 neq(EMPTYSTRING.join(lines), msg.get_payload())
2813 # Now a more complicated multipart
2814 msg = self._msgobj('msg_02.txt')
2815 it = iterators.body_line_iterator(msg)
2816 lines = list(it)
2817 eq(len(lines), 43)
2818 with openfile('msg_19.txt') as fp:
2819 neq(EMPTYSTRING.join(lines), fp.read())
2820
2821 def test_typed_subpart_iterator(self):
2822 eq = self.assertEqual
2823 msg = self._msgobj('msg_04.txt')
2824 it = iterators.typed_subpart_iterator(msg, 'text')
2825 lines = []
2826 subparts = 0
2827 for subpart in it:
2828 subparts += 1
2829 lines.append(subpart.get_payload())
2830 eq(subparts, 2)
2831 eq(EMPTYSTRING.join(lines), """\
2832a simple kind of mirror
2833to reflect upon our own
2834a simple kind of mirror
2835to reflect upon our own
2836""")
2837
2838 def test_typed_subpart_iterator_default_type(self):
2839 eq = self.assertEqual
2840 msg = self._msgobj('msg_03.txt')
2841 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2842 lines = []
2843 subparts = 0
2844 for subpart in it:
2845 subparts += 1
2846 lines.append(subpart.get_payload())
2847 eq(subparts, 1)
2848 eq(EMPTYSTRING.join(lines), """\
2849
2850Hi,
2851
2852Do you like this message?
2853
2854-Me
2855""")
2856
R. David Murray45bf773f2010-07-17 01:19:57 +00002857 def test_pushCR_LF(self):
2858 '''FeedParser BufferedSubFile.push() assumed it received complete
2859 line endings. A CR ending one push() followed by a LF starting
2860 the next push() added an empty line.
2861 '''
2862 imt = [
2863 ("a\r \n", 2),
2864 ("b", 0),
2865 ("c\n", 1),
2866 ("", 0),
2867 ("d\r\n", 1),
2868 ("e\r", 0),
2869 ("\nf", 1),
2870 ("\r\n", 1),
2871 ]
2872 from email.feedparser import BufferedSubFile, NeedMoreData
2873 bsf = BufferedSubFile()
2874 om = []
2875 nt = 0
2876 for il, n in imt:
2877 bsf.push(il)
2878 nt += n
2879 n1 = 0
2880 while True:
2881 ol = bsf.readline()
2882 if ol == NeedMoreData:
2883 break
2884 om.append(ol)
2885 n1 += 1
2886 self.assertTrue(n == n1)
2887 self.assertTrue(len(om) == nt)
2888 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2889
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002890
Ezio Melottib3aedd42010-11-20 19:04:17 +00002891
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002892class TestParsers(TestEmailBase):
2893 def test_header_parser(self):
2894 eq = self.assertEqual
2895 # Parse only the headers of a complex multipart MIME document
2896 with openfile('msg_02.txt') as fp:
2897 msg = HeaderParser().parse(fp)
2898 eq(msg['from'], 'ppp-request@zzz.org')
2899 eq(msg['to'], 'ppp@zzz.org')
2900 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002901 self.assertFalse(msg.is_multipart())
2902 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002903
2904 def test_whitespace_continuation(self):
2905 eq = self.assertEqual
2906 # This message contains a line after the Subject: header that has only
2907 # whitespace, but it is not empty!
2908 msg = email.message_from_string("""\
2909From: aperson@dom.ain
2910To: bperson@dom.ain
2911Subject: the next line has a space on it
2912\x20
2913Date: Mon, 8 Apr 2002 15:09:19 -0400
2914Message-ID: spam
2915
2916Here's the message body
2917""")
2918 eq(msg['subject'], 'the next line has a space on it\n ')
2919 eq(msg['message-id'], 'spam')
2920 eq(msg.get_payload(), "Here's the message body\n")
2921
2922 def test_whitespace_continuation_last_header(self):
2923 eq = self.assertEqual
2924 # Like the previous test, but the subject line is the last
2925 # header.
2926 msg = email.message_from_string("""\
2927From: aperson@dom.ain
2928To: bperson@dom.ain
2929Date: Mon, 8 Apr 2002 15:09:19 -0400
2930Message-ID: spam
2931Subject: the next line has a space on it
2932\x20
2933
2934Here's the message body
2935""")
2936 eq(msg['subject'], 'the next line has a space on it\n ')
2937 eq(msg['message-id'], 'spam')
2938 eq(msg.get_payload(), "Here's the message body\n")
2939
2940 def test_crlf_separation(self):
2941 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002942 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002943 msg = Parser().parse(fp)
2944 eq(len(msg.get_payload()), 2)
2945 part1 = msg.get_payload(0)
2946 eq(part1.get_content_type(), 'text/plain')
2947 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2948 part2 = msg.get_payload(1)
2949 eq(part2.get_content_type(), 'application/riscos')
2950
R. David Murray8451c4b2010-10-23 22:19:56 +00002951 def test_crlf_flatten(self):
2952 # Using newline='\n' preserves the crlfs in this input file.
2953 with openfile('msg_26.txt', newline='\n') as fp:
2954 text = fp.read()
2955 msg = email.message_from_string(text)
2956 s = StringIO()
2957 g = Generator(s)
2958 g.flatten(msg, linesep='\r\n')
2959 self.assertEqual(s.getvalue(), text)
2960
2961 maxDiff = None
2962
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002963 def test_multipart_digest_with_extra_mime_headers(self):
2964 eq = self.assertEqual
2965 neq = self.ndiffAssertEqual
2966 with openfile('msg_28.txt') as fp:
2967 msg = email.message_from_file(fp)
2968 # Structure is:
2969 # multipart/digest
2970 # message/rfc822
2971 # text/plain
2972 # message/rfc822
2973 # text/plain
2974 eq(msg.is_multipart(), 1)
2975 eq(len(msg.get_payload()), 2)
2976 part1 = msg.get_payload(0)
2977 eq(part1.get_content_type(), 'message/rfc822')
2978 eq(part1.is_multipart(), 1)
2979 eq(len(part1.get_payload()), 1)
2980 part1a = part1.get_payload(0)
2981 eq(part1a.is_multipart(), 0)
2982 eq(part1a.get_content_type(), 'text/plain')
2983 neq(part1a.get_payload(), 'message 1\n')
2984 # next message/rfc822
2985 part2 = msg.get_payload(1)
2986 eq(part2.get_content_type(), 'message/rfc822')
2987 eq(part2.is_multipart(), 1)
2988 eq(len(part2.get_payload()), 1)
2989 part2a = part2.get_payload(0)
2990 eq(part2a.is_multipart(), 0)
2991 eq(part2a.get_content_type(), 'text/plain')
2992 neq(part2a.get_payload(), 'message 2\n')
2993
2994 def test_three_lines(self):
2995 # A bug report by Andrew McNamara
2996 lines = ['From: Andrew Person <aperson@dom.ain',
2997 'Subject: Test',
2998 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2999 msg = email.message_from_string(NL.join(lines))
3000 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3001
3002 def test_strip_line_feed_and_carriage_return_in_headers(self):
3003 eq = self.assertEqual
3004 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3005 value1 = 'text'
3006 value2 = 'more text'
3007 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3008 value1, value2)
3009 msg = email.message_from_string(m)
3010 eq(msg.get('Header'), value1)
3011 eq(msg.get('Next-Header'), value2)
3012
3013 def test_rfc2822_header_syntax(self):
3014 eq = self.assertEqual
3015 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3016 msg = email.message_from_string(m)
3017 eq(len(msg), 3)
3018 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3019 eq(msg.get_payload(), 'body')
3020
3021 def test_rfc2822_space_not_allowed_in_header(self):
3022 eq = self.assertEqual
3023 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3024 msg = email.message_from_string(m)
3025 eq(len(msg.keys()), 0)
3026
3027 def test_rfc2822_one_character_header(self):
3028 eq = self.assertEqual
3029 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3030 msg = email.message_from_string(m)
3031 headers = msg.keys()
3032 headers.sort()
3033 eq(headers, ['A', 'B', 'CC'])
3034 eq(msg.get_payload(), 'body')
3035
R. David Murray45e0e142010-06-16 02:19:40 +00003036 def test_CRLFLF_at_end_of_part(self):
3037 # issue 5610: feedparser should not eat two chars from body part ending
3038 # with "\r\n\n".
3039 m = (
3040 "From: foo@bar.com\n"
3041 "To: baz\n"
3042 "Mime-Version: 1.0\n"
3043 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3044 "\n"
3045 "--BOUNDARY\n"
3046 "Content-Type: text/plain\n"
3047 "\n"
3048 "body ending with CRLF newline\r\n"
3049 "\n"
3050 "--BOUNDARY--\n"
3051 )
3052 msg = email.message_from_string(m)
3053 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003054
Ezio Melottib3aedd42010-11-20 19:04:17 +00003055
R. David Murray96fd54e2010-10-08 15:55:28 +00003056class Test8BitBytesHandling(unittest.TestCase):
3057 # In Python3 all input is string, but that doesn't work if the actual input
3058 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3059 # decode byte streams using the surrogateescape error handler, and
3060 # reconvert to binary at appropriate places if we detect surrogates. This
3061 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3062 # but it does allow us to parse and preserve them, and to decode body
3063 # parts that use an 8bit CTE.
3064
3065 bodytest_msg = textwrap.dedent("""\
3066 From: foo@bar.com
3067 To: baz
3068 Mime-Version: 1.0
3069 Content-Type: text/plain; charset={charset}
3070 Content-Transfer-Encoding: {cte}
3071
3072 {bodyline}
3073 """)
3074
3075 def test_known_8bit_CTE(self):
3076 m = self.bodytest_msg.format(charset='utf-8',
3077 cte='8bit',
3078 bodyline='pöstal').encode('utf-8')
3079 msg = email.message_from_bytes(m)
3080 self.assertEqual(msg.get_payload(), "pöstal\n")
3081 self.assertEqual(msg.get_payload(decode=True),
3082 "pöstal\n".encode('utf-8'))
3083
3084 def test_unknown_8bit_CTE(self):
3085 m = self.bodytest_msg.format(charset='notavalidcharset',
3086 cte='8bit',
3087 bodyline='pöstal').encode('utf-8')
3088 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003089 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003090 self.assertEqual(msg.get_payload(decode=True),
3091 "pöstal\n".encode('utf-8'))
3092
3093 def test_8bit_in_quopri_body(self):
3094 # This is non-RFC compliant data...without 'decode' the library code
3095 # decodes the body using the charset from the headers, and because the
3096 # source byte really is utf-8 this works. This is likely to fail
3097 # against real dirty data (ie: produce mojibake), but the data is
3098 # invalid anyway so it is as good a guess as any. But this means that
3099 # this test just confirms the current behavior; that behavior is not
3100 # necessarily the best possible behavior. With 'decode' it is
3101 # returning the raw bytes, so that test should be of correct behavior,
3102 # or at least produce the same result that email4 did.
3103 m = self.bodytest_msg.format(charset='utf-8',
3104 cte='quoted-printable',
3105 bodyline='p=C3=B6stál').encode('utf-8')
3106 msg = email.message_from_bytes(m)
3107 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3108 self.assertEqual(msg.get_payload(decode=True),
3109 'pöstál\n'.encode('utf-8'))
3110
3111 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3112 # This is similar to the previous test, but proves that if the 8bit
3113 # byte is undecodeable in the specified charset, it gets replaced
3114 # by the unicode 'unknown' character. Again, this may or may not
3115 # be the ideal behavior. Note that if decode=False none of the
3116 # decoders will get involved, so this is the only test we need
3117 # for this behavior.
3118 m = self.bodytest_msg.format(charset='ascii',
3119 cte='quoted-printable',
3120 bodyline='p=C3=B6stál').encode('utf-8')
3121 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003122 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003123 self.assertEqual(msg.get_payload(decode=True),
3124 'pöstál\n'.encode('utf-8'))
3125
3126 def test_8bit_in_base64_body(self):
3127 # Sticking an 8bit byte in a base64 block makes it undecodable by
3128 # normal means, so the block is returned undecoded, but as bytes.
3129 m = self.bodytest_msg.format(charset='utf-8',
3130 cte='base64',
3131 bodyline='cMO2c3RhbAá=').encode('utf-8')
3132 msg = email.message_from_bytes(m)
3133 self.assertEqual(msg.get_payload(decode=True),
3134 'cMO2c3RhbAá=\n'.encode('utf-8'))
3135
3136 def test_8bit_in_uuencode_body(self):
3137 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3138 # normal means, so the block is returned undecoded, but as bytes.
3139 m = self.bodytest_msg.format(charset='utf-8',
3140 cte='uuencode',
3141 bodyline='<,.V<W1A; á ').encode('utf-8')
3142 msg = email.message_from_bytes(m)
3143 self.assertEqual(msg.get_payload(decode=True),
3144 '<,.V<W1A; á \n'.encode('utf-8'))
3145
3146
R. David Murray92532142011-01-07 23:25:30 +00003147 headertest_headers = (
3148 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3149 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3150 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3151 '\tJean de Baddie',
3152 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3153 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3154 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3155 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3156 )
3157 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3158 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003159
3160 def test_get_8bit_header(self):
3161 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003162 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3163 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003164
3165 def test_print_8bit_headers(self):
3166 msg = email.message_from_bytes(self.headertest_msg)
3167 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003168 textwrap.dedent("""\
3169 From: {}
3170 To: {}
3171 Subject: {}
3172 From: {}
3173
3174 Yes, they are flying.
3175 """).format(*[expected[1] for (_, expected) in
3176 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003177
3178 def test_values_with_8bit_headers(self):
3179 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003180 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003181 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003182 'b\uFFFD\uFFFDz',
3183 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3184 'coll\uFFFD\uFFFDgue, le pouf '
3185 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003186 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003187 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003188
3189 def test_items_with_8bit_headers(self):
3190 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003191 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003192 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003193 ('To', 'b\uFFFD\uFFFDz'),
3194 ('Subject', 'Maintenant je vous '
3195 'pr\uFFFD\uFFFDsente '
3196 'mon coll\uFFFD\uFFFDgue, le pouf '
3197 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3198 '\tJean de Baddie'),
3199 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003200
3201 def test_get_all_with_8bit_headers(self):
3202 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003203 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003204 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003205 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003206
R David Murraya2150232011-03-16 21:11:23 -04003207 def test_get_content_type_with_8bit(self):
3208 msg = email.message_from_bytes(textwrap.dedent("""\
3209 Content-Type: text/pl\xA7in; charset=utf-8
3210 """).encode('latin-1'))
3211 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3212 self.assertEqual(msg.get_content_maintype(), "text")
3213 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3214
3215 def test_get_params_with_8bit(self):
3216 msg = email.message_from_bytes(
3217 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3218 self.assertEqual(msg.get_params(header='x-header'),
3219 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3220 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3221 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3222 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3223
3224 def test_get_rfc2231_params_with_8bit(self):
3225 msg = email.message_from_bytes(textwrap.dedent("""\
3226 Content-Type: text/plain; charset=us-ascii;
3227 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3228 ).encode('latin-1'))
3229 self.assertEqual(msg.get_param('title'),
3230 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3231
3232 def test_set_rfc2231_params_with_8bit(self):
3233 msg = email.message_from_bytes(textwrap.dedent("""\
3234 Content-Type: text/plain; charset=us-ascii;
3235 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3236 ).encode('latin-1'))
3237 msg.set_param('title', 'test')
3238 self.assertEqual(msg.get_param('title'), 'test')
3239
3240 def test_del_rfc2231_params_with_8bit(self):
3241 msg = email.message_from_bytes(textwrap.dedent("""\
3242 Content-Type: text/plain; charset=us-ascii;
3243 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3244 ).encode('latin-1'))
3245 msg.del_param('title')
3246 self.assertEqual(msg.get_param('title'), None)
3247 self.assertEqual(msg.get_content_maintype(), 'text')
3248
3249 def test_get_payload_with_8bit_cte_header(self):
3250 msg = email.message_from_bytes(textwrap.dedent("""\
3251 Content-Transfer-Encoding: b\xa7se64
3252 Content-Type: text/plain; charset=latin-1
3253
3254 payload
3255 """).encode('latin-1'))
3256 self.assertEqual(msg.get_payload(), 'payload\n')
3257 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3258
R. David Murray96fd54e2010-10-08 15:55:28 +00003259 non_latin_bin_msg = textwrap.dedent("""\
3260 From: foo@bar.com
3261 To: báz
3262 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3263 \tJean de Baddie
3264 Mime-Version: 1.0
3265 Content-Type: text/plain; charset="utf-8"
3266 Content-Transfer-Encoding: 8bit
3267
3268 Да, они летят.
3269 """).encode('utf-8')
3270
3271 def test_bytes_generator(self):
3272 msg = email.message_from_bytes(self.non_latin_bin_msg)
3273 out = BytesIO()
3274 email.generator.BytesGenerator(out).flatten(msg)
3275 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3276
R. David Murray7372a072011-01-26 21:21:32 +00003277 def test_bytes_generator_handles_None_body(self):
3278 #Issue 11019
3279 msg = email.message.Message()
3280 out = BytesIO()
3281 email.generator.BytesGenerator(out).flatten(msg)
3282 self.assertEqual(out.getvalue(), b"\n")
3283
R. David Murray92532142011-01-07 23:25:30 +00003284 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003285 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003286 To: =?unknown-8bit?q?b=C3=A1z?=
3287 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3288 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3289 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003290 Mime-Version: 1.0
3291 Content-Type: text/plain; charset="utf-8"
3292 Content-Transfer-Encoding: base64
3293
3294 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3295 """)
3296
3297 def test_generator_handles_8bit(self):
3298 msg = email.message_from_bytes(self.non_latin_bin_msg)
3299 out = StringIO()
3300 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003301 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003302
3303 def test_bytes_generator_with_unix_from(self):
3304 # The unixfrom contains a current date, so we can't check it
3305 # literally. Just make sure the first word is 'From' and the
3306 # rest of the message matches the input.
3307 msg = email.message_from_bytes(self.non_latin_bin_msg)
3308 out = BytesIO()
3309 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3310 lines = out.getvalue().split(b'\n')
3311 self.assertEqual(lines[0].split()[0], b'From')
3312 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3313
R. David Murray92532142011-01-07 23:25:30 +00003314 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3315 non_latin_bin_msg_as7bit[2:4] = [
3316 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3317 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3318 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3319
R. David Murray96fd54e2010-10-08 15:55:28 +00003320 def test_message_from_binary_file(self):
3321 fn = 'test.msg'
3322 self.addCleanup(unlink, fn)
3323 with open(fn, 'wb') as testfile:
3324 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003325 with open(fn, 'rb') as testfile:
3326 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003327 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3328
3329 latin_bin_msg = textwrap.dedent("""\
3330 From: foo@bar.com
3331 To: Dinsdale
3332 Subject: Nudge nudge, wink, wink
3333 Mime-Version: 1.0
3334 Content-Type: text/plain; charset="latin-1"
3335 Content-Transfer-Encoding: 8bit
3336
3337 oh là là, know what I mean, know what I mean?
3338 """).encode('latin-1')
3339
3340 latin_bin_msg_as7bit = textwrap.dedent("""\
3341 From: foo@bar.com
3342 To: Dinsdale
3343 Subject: Nudge nudge, wink, wink
3344 Mime-Version: 1.0
3345 Content-Type: text/plain; charset="iso-8859-1"
3346 Content-Transfer-Encoding: quoted-printable
3347
3348 oh l=E0 l=E0, know what I mean, know what I mean?
3349 """)
3350
3351 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3352 m = email.message_from_bytes(self.latin_bin_msg)
3353 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3354
3355 def test_decoded_generator_emits_unicode_body(self):
3356 m = email.message_from_bytes(self.latin_bin_msg)
3357 out = StringIO()
3358 email.generator.DecodedGenerator(out).flatten(m)
3359 #DecodedHeader output contains an extra blank line compared
3360 #to the input message. RDM: not sure if this is a bug or not,
3361 #but it is not specific to the 8bit->7bit conversion.
3362 self.assertEqual(out.getvalue(),
3363 self.latin_bin_msg.decode('latin-1')+'\n')
3364
3365 def test_bytes_feedparser(self):
3366 bfp = email.feedparser.BytesFeedParser()
3367 for i in range(0, len(self.latin_bin_msg), 10):
3368 bfp.feed(self.latin_bin_msg[i:i+10])
3369 m = bfp.close()
3370 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3371
R. David Murray8451c4b2010-10-23 22:19:56 +00003372 def test_crlf_flatten(self):
3373 with openfile('msg_26.txt', 'rb') as fp:
3374 text = fp.read()
3375 msg = email.message_from_bytes(text)
3376 s = BytesIO()
3377 g = email.generator.BytesGenerator(s)
3378 g.flatten(msg, linesep='\r\n')
3379 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003380
3381 def test_8bit_multipart(self):
3382 # Issue 11605
3383 source = textwrap.dedent("""\
3384 Date: Fri, 18 Mar 2011 17:15:43 +0100
3385 To: foo@example.com
3386 From: foodwatch-Newsletter <bar@example.com>
3387 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3388 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3389 MIME-Version: 1.0
3390 Content-Type: multipart/alternative;
3391 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3392
3393 --b1_76a486bee62b0d200f33dc2ca08220ad
3394 Content-Type: text/plain; charset="utf-8"
3395 Content-Transfer-Encoding: 8bit
3396
3397 Guten Tag, ,
3398
3399 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3400 Nachrichten aus Japan.
3401
3402
3403 --b1_76a486bee62b0d200f33dc2ca08220ad
3404 Content-Type: text/html; charset="utf-8"
3405 Content-Transfer-Encoding: 8bit
3406
3407 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3408 "http://www.w3.org/TR/html4/loose.dtd">
3409 <html lang="de">
3410 <head>
3411 <title>foodwatch - Newsletter</title>
3412 </head>
3413 <body>
3414 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3415 die Nachrichten aus Japan.</p>
3416 </body>
3417 </html>
3418 --b1_76a486bee62b0d200f33dc2ca08220ad--
3419
3420 """).encode('utf-8')
3421 msg = email.message_from_bytes(source)
3422 s = BytesIO()
3423 g = email.generator.BytesGenerator(s)
3424 g.flatten(msg)
3425 self.assertEqual(s.getvalue(), source)
3426
R. David Murray8451c4b2010-10-23 22:19:56 +00003427 maxDiff = None
3428
Ezio Melottib3aedd42010-11-20 19:04:17 +00003429
R. David Murray719a4492010-11-21 16:53:48 +00003430class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003431
R. David Murraye5db2632010-11-20 15:10:13 +00003432 maxDiff = None
3433
R. David Murray96fd54e2010-10-08 15:55:28 +00003434 def _msgobj(self, filename):
3435 with openfile(filename, 'rb') as fp:
3436 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003437 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003438 msg = email.message_from_bytes(data)
3439 return msg, data
3440
R. David Murray719a4492010-11-21 16:53:48 +00003441 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003442 b = BytesIO()
3443 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003444 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R. David Murraye5db2632010-11-20 15:10:13 +00003445 self.assertByteStringsEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003446
R. David Murraye5db2632010-11-20 15:10:13 +00003447 def assertByteStringsEqual(self, str1, str2):
R. David Murray719a4492010-11-21 16:53:48 +00003448 # Not using self.blinesep here is intentional. This way the output
3449 # is more useful when the failure results in mixed line endings.
R. David Murray96fd54e2010-10-08 15:55:28 +00003450 self.assertListEqual(str1.split(b'\n'), str2.split(b'\n'))
3451
3452
R. David Murray719a4492010-11-21 16:53:48 +00003453class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3454 TestIdempotent):
3455 linesep = '\n'
3456 blinesep = b'\n'
3457 normalize_linesep_regex = re.compile(br'\r\n')
3458
3459
3460class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3461 TestIdempotent):
3462 linesep = '\r\n'
3463 blinesep = b'\r\n'
3464 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3465
Ezio Melottib3aedd42010-11-20 19:04:17 +00003466
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003467class TestBase64(unittest.TestCase):
3468 def test_len(self):
3469 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003470 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003471 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003472 for size in range(15):
3473 if size == 0 : bsize = 0
3474 elif size <= 3 : bsize = 4
3475 elif size <= 6 : bsize = 8
3476 elif size <= 9 : bsize = 12
3477 elif size <= 12: bsize = 16
3478 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003479 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003480
3481 def test_decode(self):
3482 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003483 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003484 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003485
3486 def test_encode(self):
3487 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003488 eq(base64mime.body_encode(b''), b'')
3489 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003490 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003491 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003492 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003493 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003494eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3495eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3496eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3497eHh4eCB4eHh4IA==
3498""")
3499 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003500 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003501 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003502eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3503eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3504eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3505eHh4eCB4eHh4IA==\r
3506""")
3507
3508 def test_header_encode(self):
3509 eq = self.assertEqual
3510 he = base64mime.header_encode
3511 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003512 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3513 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003514 # Test the charset option
3515 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3516 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003517
3518
Ezio Melottib3aedd42010-11-20 19:04:17 +00003519
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003520class TestQuopri(unittest.TestCase):
3521 def setUp(self):
3522 # Set of characters (as byte integers) that don't need to be encoded
3523 # in headers.
3524 self.hlit = list(chain(
3525 range(ord('a'), ord('z') + 1),
3526 range(ord('A'), ord('Z') + 1),
3527 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003528 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003529 # Set of characters (as byte integers) that do need to be encoded in
3530 # headers.
3531 self.hnon = [c for c in range(256) if c not in self.hlit]
3532 assert len(self.hlit) + len(self.hnon) == 256
3533 # Set of characters (as byte integers) that don't need to be encoded
3534 # in bodies.
3535 self.blit = list(range(ord(' '), ord('~') + 1))
3536 self.blit.append(ord('\t'))
3537 self.blit.remove(ord('='))
3538 # Set of characters (as byte integers) that do need to be encoded in
3539 # bodies.
3540 self.bnon = [c for c in range(256) if c not in self.blit]
3541 assert len(self.blit) + len(self.bnon) == 256
3542
Guido van Rossum9604e662007-08-30 03:46:43 +00003543 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003544 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003545 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003546 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003547 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003548 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003549 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003550
Guido van Rossum9604e662007-08-30 03:46:43 +00003551 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003552 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003553 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003554 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003555 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003556 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003557 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003558
3559 def test_header_quopri_len(self):
3560 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003561 eq(quoprimime.header_length(b'hello'), 5)
3562 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003563 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003564 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003565 # =?xxx?q?...?= means 10 extra characters
3566 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003567 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3568 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003569 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003570 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003571 # =?xxx?q?...?= means 10 extra characters
3572 10)
3573 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003574 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003575 'expected length 1 for %r' % chr(c))
3576 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003577 # Space is special; it's encoded to _
3578 if c == ord(' '):
3579 continue
3580 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003581 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003582 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003583
3584 def test_body_quopri_len(self):
3585 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003586 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003587 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003588 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003589 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003590
3591 def test_quote_unquote_idempotent(self):
3592 for x in range(256):
3593 c = chr(x)
3594 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3595
R David Murrayec1b5b82011-03-23 14:19:05 -04003596 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3597 if charset is None:
3598 encoded_header = quoprimime.header_encode(header)
3599 else:
3600 encoded_header = quoprimime.header_encode(header, charset)
3601 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003602
R David Murraycafd79d2011-03-23 15:25:55 -04003603 def test_header_encode_null(self):
3604 self._test_header_encode(b'', '')
3605
R David Murrayec1b5b82011-03-23 14:19:05 -04003606 def test_header_encode_one_word(self):
3607 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3608
3609 def test_header_encode_two_lines(self):
3610 self._test_header_encode(b'hello\nworld',
3611 '=?iso-8859-1?q?hello=0Aworld?=')
3612
3613 def test_header_encode_non_ascii(self):
3614 self._test_header_encode(b'hello\xc7there',
3615 '=?iso-8859-1?q?hello=C7there?=')
3616
3617 def test_header_encode_alt_charset(self):
3618 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3619 charset='iso-8859-2')
3620
3621 def _test_header_decode(self, encoded_header, expected_decoded_header):
3622 decoded_header = quoprimime.header_decode(encoded_header)
3623 self.assertEqual(decoded_header, expected_decoded_header)
3624
3625 def test_header_decode_null(self):
3626 self._test_header_decode('', '')
3627
3628 def test_header_decode_one_word(self):
3629 self._test_header_decode('hello', 'hello')
3630
3631 def test_header_decode_two_lines(self):
3632 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3633
3634 def test_header_decode_non_ascii(self):
3635 self._test_header_decode('hello=C7there', 'hello\xc7there')
3636
3637 def _test_decode(self, encoded, expected_decoded, eol=None):
3638 if eol is None:
3639 decoded = quoprimime.decode(encoded)
3640 else:
3641 decoded = quoprimime.decode(encoded, eol=eol)
3642 self.assertEqual(decoded, expected_decoded)
3643
3644 def test_decode_null_word(self):
3645 self._test_decode('', '')
3646
3647 def test_decode_null_line_null_word(self):
3648 self._test_decode('\r\n', '\n')
3649
3650 def test_decode_one_word(self):
3651 self._test_decode('hello', 'hello')
3652
3653 def test_decode_one_word_eol(self):
3654 self._test_decode('hello', 'hello', eol='X')
3655
3656 def test_decode_one_line(self):
3657 self._test_decode('hello\r\n', 'hello\n')
3658
3659 def test_decode_one_line_lf(self):
3660 self._test_decode('hello\n', 'hello\n')
3661
R David Murraycafd79d2011-03-23 15:25:55 -04003662 def test_decode_one_line_cr(self):
3663 self._test_decode('hello\r', 'hello\n')
3664
3665 def test_decode_one_line_nl(self):
3666 self._test_decode('hello\n', 'helloX', eol='X')
3667
3668 def test_decode_one_line_crnl(self):
3669 self._test_decode('hello\r\n', 'helloX', eol='X')
3670
R David Murrayec1b5b82011-03-23 14:19:05 -04003671 def test_decode_one_line_one_word(self):
3672 self._test_decode('hello\r\nworld', 'hello\nworld')
3673
3674 def test_decode_one_line_one_word_eol(self):
3675 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3676
3677 def test_decode_two_lines(self):
3678 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3679
R David Murraycafd79d2011-03-23 15:25:55 -04003680 def test_decode_two_lines_eol(self):
3681 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3682
R David Murrayec1b5b82011-03-23 14:19:05 -04003683 def test_decode_one_long_line(self):
3684 self._test_decode('Spam' * 250, 'Spam' * 250)
3685
3686 def test_decode_one_space(self):
3687 self._test_decode(' ', '')
3688
3689 def test_decode_multiple_spaces(self):
3690 self._test_decode(' ' * 5, '')
3691
3692 def test_decode_one_line_trailing_spaces(self):
3693 self._test_decode('hello \r\n', 'hello\n')
3694
3695 def test_decode_two_lines_trailing_spaces(self):
3696 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3697
3698 def test_decode_quoted_word(self):
3699 self._test_decode('=22quoted=20words=22', '"quoted words"')
3700
3701 def test_decode_uppercase_quoting(self):
3702 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
3703
3704 def test_decode_lowercase_quoting(self):
3705 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
3706
3707 def test_decode_soft_line_break(self):
3708 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
3709
3710 def test_decode_false_quoting(self):
3711 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
3712
3713 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
3714 kwargs = {}
3715 if maxlinelen is None:
3716 # Use body_encode's default.
3717 maxlinelen = 76
3718 else:
3719 kwargs['maxlinelen'] = maxlinelen
3720 if eol is None:
3721 # Use body_encode's default.
3722 eol = '\n'
3723 else:
3724 kwargs['eol'] = eol
3725 encoded_body = quoprimime.body_encode(body, **kwargs)
3726 self.assertEqual(encoded_body, expected_encoded_body)
3727 if eol == '\n' or eol == '\r\n':
3728 # We know how to split the result back into lines, so maxlinelen
3729 # can be checked.
3730 for line in encoded_body.splitlines():
3731 self.assertLessEqual(len(line), maxlinelen)
3732
3733 def test_encode_null(self):
3734 self._test_encode('', '')
3735
3736 def test_encode_null_lines(self):
3737 self._test_encode('\n\n', '\n\n')
3738
3739 def test_encode_one_line(self):
3740 self._test_encode('hello\n', 'hello\n')
3741
3742 def test_encode_one_line_crlf(self):
3743 self._test_encode('hello\r\n', 'hello\n')
3744
3745 def test_encode_one_line_eol(self):
3746 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
3747
3748 def test_encode_one_space(self):
3749 self._test_encode(' ', '=20')
3750
3751 def test_encode_one_line_one_space(self):
3752 self._test_encode(' \n', '=20\n')
3753
R David Murrayb938c8c2011-03-24 12:19:26 -04003754# XXX: body_encode() expect strings, but uses ord(char) from these strings
3755# to index into a 256-entry list. For code points above 255, this will fail.
3756# Should there be a check for 8-bit only ord() values in body, or at least
3757# a comment about the expected input?
3758
3759 def test_encode_two_lines_one_space(self):
3760 self._test_encode(' \n \n', '=20\n=20\n')
3761
R David Murrayec1b5b82011-03-23 14:19:05 -04003762 def test_encode_one_word_trailing_spaces(self):
3763 self._test_encode('hello ', 'hello =20')
3764
3765 def test_encode_one_line_trailing_spaces(self):
3766 self._test_encode('hello \n', 'hello =20\n')
3767
3768 def test_encode_one_word_trailing_tab(self):
3769 self._test_encode('hello \t', 'hello =09')
3770
3771 def test_encode_one_line_trailing_tab(self):
3772 self._test_encode('hello \t\n', 'hello =09\n')
3773
3774 def test_encode_trailing_space_before_maxlinelen(self):
3775 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
3776
R David Murrayb938c8c2011-03-24 12:19:26 -04003777 def test_encode_trailing_space_at_maxlinelen(self):
3778 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
3779
R David Murrayec1b5b82011-03-23 14:19:05 -04003780 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04003781 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
3782
3783 def test_encode_whitespace_lines(self):
3784 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04003785
3786 def test_encode_quoted_equals(self):
3787 self._test_encode('a = b', 'a =3D b')
3788
3789 def test_encode_one_long_string(self):
3790 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
3791
3792 def test_encode_one_long_line(self):
3793 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3794
3795 def test_encode_one_very_long_line(self):
3796 self._test_encode('x' * 200 + '\n',
3797 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
3798
3799 def test_encode_one_long_line(self):
3800 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3801
3802 def test_encode_shortest_maxlinelen(self):
3803 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003804
R David Murrayb938c8c2011-03-24 12:19:26 -04003805 def test_encode_maxlinelen_too_small(self):
3806 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
3807
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003808 def test_encode(self):
3809 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003810 eq(quoprimime.body_encode(''), '')
3811 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003812 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003813 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003814 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003815 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003816xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3817 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3818x xxxx xxxx xxxx xxxx=20""")
3819 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003820 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3821 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003822xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3823 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3824x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003825 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003826one line
3827
3828two line"""), """\
3829one line
3830
3831two line""")
3832
3833
Ezio Melottib3aedd42010-11-20 19:04:17 +00003834
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003835# Test the Charset class
3836class TestCharset(unittest.TestCase):
3837 def tearDown(self):
3838 from email import charset as CharsetModule
3839 try:
3840 del CharsetModule.CHARSETS['fake']
3841 except KeyError:
3842 pass
3843
Guido van Rossum9604e662007-08-30 03:46:43 +00003844 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003845 eq = self.assertEqual
3846 # Make sure us-ascii = no Unicode conversion
3847 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00003848 eq(c.header_encode('Hello World!'), 'Hello World!')
3849 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003850 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00003851 self.assertRaises(UnicodeError, c.header_encode, s)
3852 c = Charset('utf-8')
3853 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003854
3855 def test_body_encode(self):
3856 eq = self.assertEqual
3857 # Try a charset with QP body encoding
3858 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003859 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003860 # Try a charset with Base64 body encoding
3861 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003862 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003863 # Try a charset with None body encoding
3864 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003865 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003866 # Try the convert argument, where input codec != output codec
3867 c = Charset('euc-jp')
3868 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00003869 # XXX FIXME
3870## try:
3871## eq('\x1b$B5FCO;~IW\x1b(B',
3872## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
3873## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
3874## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
3875## except LookupError:
3876## # We probably don't have the Japanese codecs installed
3877## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003878 # Testing SF bug #625509, which we have to fake, since there are no
3879 # built-in encodings where the header encoding is QP but the body
3880 # encoding is not.
3881 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04003882 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003883 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04003884 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003885
3886 def test_unicode_charset_name(self):
3887 charset = Charset('us-ascii')
3888 self.assertEqual(str(charset), 'us-ascii')
3889 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
3890
3891
Ezio Melottib3aedd42010-11-20 19:04:17 +00003892
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003893# Test multilingual MIME headers.
3894class TestHeader(TestEmailBase):
3895 def test_simple(self):
3896 eq = self.ndiffAssertEqual
3897 h = Header('Hello World!')
3898 eq(h.encode(), 'Hello World!')
3899 h.append(' Goodbye World!')
3900 eq(h.encode(), 'Hello World! Goodbye World!')
3901
3902 def test_simple_surprise(self):
3903 eq = self.ndiffAssertEqual
3904 h = Header('Hello World!')
3905 eq(h.encode(), 'Hello World!')
3906 h.append('Goodbye World!')
3907 eq(h.encode(), 'Hello World! Goodbye World!')
3908
3909 def test_header_needs_no_decoding(self):
3910 h = 'no decoding needed'
3911 self.assertEqual(decode_header(h), [(h, None)])
3912
3913 def test_long(self):
3914 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
3915 maxlinelen=76)
3916 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003917 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003918
3919 def test_multilingual(self):
3920 eq = self.ndiffAssertEqual
3921 g = Charset("iso-8859-1")
3922 cz = Charset("iso-8859-2")
3923 utf8 = Charset("utf-8")
3924 g_head = (b'Die Mieter treten hier ein werden mit einem '
3925 b'Foerderband komfortabel den Korridor entlang, '
3926 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
3927 b'gegen die rotierenden Klingen bef\xf6rdert. ')
3928 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
3929 b'd\xf9vtipu.. ')
3930 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
3931 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
3932 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
3933 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
3934 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
3935 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
3936 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
3937 '\u3044\u307e\u3059\u3002')
3938 h = Header(g_head, g)
3939 h.append(cz_head, cz)
3940 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00003941 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003942 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00003943=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
3944 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
3945 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
3946 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003947 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
3948 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
3949 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
3950 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00003951 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
3952 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
3953 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
3954 decoded = decode_header(enc)
3955 eq(len(decoded), 3)
3956 eq(decoded[0], (g_head, 'iso-8859-1'))
3957 eq(decoded[1], (cz_head, 'iso-8859-2'))
3958 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003959 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00003960 eq(ustr,
3961 (b'Die Mieter treten hier ein werden mit einem Foerderband '
3962 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
3963 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
3964 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
3965 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
3966 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
3967 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
3968 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
3969 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
3970 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
3971 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
3972 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
3973 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
3974 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
3975 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
3976 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
3977 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003978 # Test make_header()
3979 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00003980 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003981
3982 def test_empty_header_encode(self):
3983 h = Header()
3984 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00003985
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003986 def test_header_ctor_default_args(self):
3987 eq = self.ndiffAssertEqual
3988 h = Header()
3989 eq(h, '')
3990 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00003991 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003992
3993 def test_explicit_maxlinelen(self):
3994 eq = self.ndiffAssertEqual
3995 hstr = ('A very long line that must get split to something other '
3996 'than at the 76th character boundary to test the non-default '
3997 'behavior')
3998 h = Header(hstr)
3999 eq(h.encode(), '''\
4000A very long line that must get split to something other than at the 76th
4001 character boundary to test the non-default behavior''')
4002 eq(str(h), hstr)
4003 h = Header(hstr, header_name='Subject')
4004 eq(h.encode(), '''\
4005A very long line that must get split to something other than at the
4006 76th character boundary to test the non-default behavior''')
4007 eq(str(h), hstr)
4008 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4009 eq(h.encode(), hstr)
4010 eq(str(h), hstr)
4011
Guido van Rossum9604e662007-08-30 03:46:43 +00004012 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004013 eq = self.ndiffAssertEqual
4014 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004015 x = 'xxxx ' * 20
4016 h.append(x)
4017 s = h.encode()
4018 eq(s, """\
4019=?iso-8859-1?q?xxx?=
4020 =?iso-8859-1?q?x_?=
4021 =?iso-8859-1?q?xx?=
4022 =?iso-8859-1?q?xx?=
4023 =?iso-8859-1?q?_x?=
4024 =?iso-8859-1?q?xx?=
4025 =?iso-8859-1?q?x_?=
4026 =?iso-8859-1?q?xx?=
4027 =?iso-8859-1?q?xx?=
4028 =?iso-8859-1?q?_x?=
4029 =?iso-8859-1?q?xx?=
4030 =?iso-8859-1?q?x_?=
4031 =?iso-8859-1?q?xx?=
4032 =?iso-8859-1?q?xx?=
4033 =?iso-8859-1?q?_x?=
4034 =?iso-8859-1?q?xx?=
4035 =?iso-8859-1?q?x_?=
4036 =?iso-8859-1?q?xx?=
4037 =?iso-8859-1?q?xx?=
4038 =?iso-8859-1?q?_x?=
4039 =?iso-8859-1?q?xx?=
4040 =?iso-8859-1?q?x_?=
4041 =?iso-8859-1?q?xx?=
4042 =?iso-8859-1?q?xx?=
4043 =?iso-8859-1?q?_x?=
4044 =?iso-8859-1?q?xx?=
4045 =?iso-8859-1?q?x_?=
4046 =?iso-8859-1?q?xx?=
4047 =?iso-8859-1?q?xx?=
4048 =?iso-8859-1?q?_x?=
4049 =?iso-8859-1?q?xx?=
4050 =?iso-8859-1?q?x_?=
4051 =?iso-8859-1?q?xx?=
4052 =?iso-8859-1?q?xx?=
4053 =?iso-8859-1?q?_x?=
4054 =?iso-8859-1?q?xx?=
4055 =?iso-8859-1?q?x_?=
4056 =?iso-8859-1?q?xx?=
4057 =?iso-8859-1?q?xx?=
4058 =?iso-8859-1?q?_x?=
4059 =?iso-8859-1?q?xx?=
4060 =?iso-8859-1?q?x_?=
4061 =?iso-8859-1?q?xx?=
4062 =?iso-8859-1?q?xx?=
4063 =?iso-8859-1?q?_x?=
4064 =?iso-8859-1?q?xx?=
4065 =?iso-8859-1?q?x_?=
4066 =?iso-8859-1?q?xx?=
4067 =?iso-8859-1?q?xx?=
4068 =?iso-8859-1?q?_?=""")
4069 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004070 h = Header(charset='iso-8859-1', maxlinelen=40)
4071 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004072 s = h.encode()
4073 eq(s, """\
4074=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4075 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4076 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4077 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4078 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4079 eq(x, str(make_header(decode_header(s))))
4080
4081 def test_base64_splittable(self):
4082 eq = self.ndiffAssertEqual
4083 h = Header(charset='koi8-r', maxlinelen=20)
4084 x = 'xxxx ' * 20
4085 h.append(x)
4086 s = h.encode()
4087 eq(s, """\
4088=?koi8-r?b?eHh4?=
4089 =?koi8-r?b?eCB4?=
4090 =?koi8-r?b?eHh4?=
4091 =?koi8-r?b?IHh4?=
4092 =?koi8-r?b?eHgg?=
4093 =?koi8-r?b?eHh4?=
4094 =?koi8-r?b?eCB4?=
4095 =?koi8-r?b?eHh4?=
4096 =?koi8-r?b?IHh4?=
4097 =?koi8-r?b?eHgg?=
4098 =?koi8-r?b?eHh4?=
4099 =?koi8-r?b?eCB4?=
4100 =?koi8-r?b?eHh4?=
4101 =?koi8-r?b?IHh4?=
4102 =?koi8-r?b?eHgg?=
4103 =?koi8-r?b?eHh4?=
4104 =?koi8-r?b?eCB4?=
4105 =?koi8-r?b?eHh4?=
4106 =?koi8-r?b?IHh4?=
4107 =?koi8-r?b?eHgg?=
4108 =?koi8-r?b?eHh4?=
4109 =?koi8-r?b?eCB4?=
4110 =?koi8-r?b?eHh4?=
4111 =?koi8-r?b?IHh4?=
4112 =?koi8-r?b?eHgg?=
4113 =?koi8-r?b?eHh4?=
4114 =?koi8-r?b?eCB4?=
4115 =?koi8-r?b?eHh4?=
4116 =?koi8-r?b?IHh4?=
4117 =?koi8-r?b?eHgg?=
4118 =?koi8-r?b?eHh4?=
4119 =?koi8-r?b?eCB4?=
4120 =?koi8-r?b?eHh4?=
4121 =?koi8-r?b?IA==?=""")
4122 eq(x, str(make_header(decode_header(s))))
4123 h = Header(charset='koi8-r', maxlinelen=40)
4124 h.append(x)
4125 s = h.encode()
4126 eq(s, """\
4127=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4128 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4129 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4130 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4131 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4132 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4133 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004134
4135 def test_us_ascii_header(self):
4136 eq = self.assertEqual
4137 s = 'hello'
4138 x = decode_header(s)
4139 eq(x, [('hello', None)])
4140 h = make_header(x)
4141 eq(s, h.encode())
4142
4143 def test_string_charset(self):
4144 eq = self.assertEqual
4145 h = Header()
4146 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004147 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004148
4149## def test_unicode_error(self):
4150## raises = self.assertRaises
4151## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4152## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4153## h = Header()
4154## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4155## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4156## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4157
4158 def test_utf8_shortest(self):
4159 eq = self.assertEqual
4160 h = Header('p\xf6stal', 'utf-8')
4161 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4162 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4163 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4164
4165 def test_bad_8bit_header(self):
4166 raises = self.assertRaises
4167 eq = self.assertEqual
4168 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4169 raises(UnicodeError, Header, x)
4170 h = Header()
4171 raises(UnicodeError, h.append, x)
4172 e = x.decode('utf-8', 'replace')
4173 eq(str(Header(x, errors='replace')), e)
4174 h.append(x, errors='replace')
4175 eq(str(h), e)
4176
R David Murray041015c2011-03-25 15:10:55 -04004177 def test_escaped_8bit_header(self):
4178 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4179 x = x.decode('ascii', 'surrogateescape')
4180 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4181 self.assertEqual(str(h),
4182 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4183 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4184
4185 def test_modify_returned_list_does_not_change_header(self):
4186 h = Header('test')
4187 chunks = email.header.decode_header(h)
4188 chunks.append(('ascii', 'test2'))
4189 self.assertEqual(str(h), 'test')
4190
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004191 def test_encoded_adjacent_nonencoded(self):
4192 eq = self.assertEqual
4193 h = Header()
4194 h.append('hello', 'iso-8859-1')
4195 h.append('world')
4196 s = h.encode()
4197 eq(s, '=?iso-8859-1?q?hello?= world')
4198 h = make_header(decode_header(s))
4199 eq(h.encode(), s)
4200
4201 def test_whitespace_eater(self):
4202 eq = self.assertEqual
4203 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4204 parts = decode_header(s)
4205 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
4206 hdr = make_header(parts)
4207 eq(hdr.encode(),
4208 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4209
4210 def test_broken_base64_header(self):
4211 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004212 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004213 raises(errors.HeaderParseError, decode_header, s)
4214
R. David Murray477efb32011-01-05 01:39:32 +00004215 def test_shift_jis_charset(self):
4216 h = Header('文', charset='shift_jis')
4217 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4218
R David Murrayde912762011-03-16 18:26:23 -04004219 def test_flatten_header_with_no_value(self):
4220 # Issue 11401 (regression from email 4.x) Note that the space after
4221 # the header doesn't reflect the input, but this is also the way
4222 # email 4.x behaved. At some point it would be nice to fix that.
4223 msg = email.message_from_string("EmptyHeader:")
4224 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4225
R David Murray01581ee2011-04-18 10:04:34 -04004226 def test_encode_preserves_leading_ws_on_value(self):
4227 msg = Message()
4228 msg['SomeHeader'] = ' value with leading ws'
4229 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4230
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004231
Ezio Melottib3aedd42010-11-20 19:04:17 +00004232
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004233# Test RFC 2231 header parameters (en/de)coding
4234class TestRFC2231(TestEmailBase):
4235 def test_get_param(self):
4236 eq = self.assertEqual
4237 msg = self._msgobj('msg_29.txt')
4238 eq(msg.get_param('title'),
4239 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4240 eq(msg.get_param('title', unquote=False),
4241 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4242
4243 def test_set_param(self):
4244 eq = self.ndiffAssertEqual
4245 msg = Message()
4246 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4247 charset='us-ascii')
4248 eq(msg.get_param('title'),
4249 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4250 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4251 charset='us-ascii', language='en')
4252 eq(msg.get_param('title'),
4253 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4254 msg = self._msgobj('msg_01.txt')
4255 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4256 charset='us-ascii', language='en')
4257 eq(msg.as_string(maxheaderlen=78), """\
4258Return-Path: <bbb@zzz.org>
4259Delivered-To: bbb@zzz.org
4260Received: by mail.zzz.org (Postfix, from userid 889)
4261\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4262MIME-Version: 1.0
4263Content-Transfer-Encoding: 7bit
4264Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4265From: bbb@ddd.com (John X. Doe)
4266To: bbb@zzz.org
4267Subject: This is a test message
4268Date: Fri, 4 May 2001 14:05:44 -0400
4269Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004270 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004271
4272
4273Hi,
4274
4275Do you like this message?
4276
4277-Me
4278""")
4279
R David Murraya2860e82011-04-16 09:20:30 -04004280 def test_set_param_requote(self):
4281 msg = Message()
4282 msg.set_param('title', 'foo')
4283 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4284 msg.set_param('title', 'bar', requote=False)
4285 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4286 # tspecial is still quoted.
4287 msg.set_param('title', "(bar)bell", requote=False)
4288 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4289
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004290 def test_del_param(self):
4291 eq = self.ndiffAssertEqual
4292 msg = self._msgobj('msg_01.txt')
4293 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4294 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4295 charset='us-ascii', language='en')
4296 msg.del_param('foo', header='Content-Type')
4297 eq(msg.as_string(maxheaderlen=78), """\
4298Return-Path: <bbb@zzz.org>
4299Delivered-To: bbb@zzz.org
4300Received: by mail.zzz.org (Postfix, from userid 889)
4301\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4302MIME-Version: 1.0
4303Content-Transfer-Encoding: 7bit
4304Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4305From: bbb@ddd.com (John X. Doe)
4306To: bbb@zzz.org
4307Subject: This is a test message
4308Date: Fri, 4 May 2001 14:05:44 -0400
4309Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004310 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004311
4312
4313Hi,
4314
4315Do you like this message?
4316
4317-Me
4318""")
4319
4320 def test_rfc2231_get_content_charset(self):
4321 eq = self.assertEqual
4322 msg = self._msgobj('msg_32.txt')
4323 eq(msg.get_content_charset(), 'us-ascii')
4324
R. David Murraydfd7eb02010-12-24 22:36:49 +00004325 def test_rfc2231_parse_rfc_quoting(self):
4326 m = textwrap.dedent('''\
4327 Content-Disposition: inline;
4328 \tfilename*0*=''This%20is%20even%20more%20;
4329 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4330 \tfilename*2="is it not.pdf"
4331
4332 ''')
4333 msg = email.message_from_string(m)
4334 self.assertEqual(msg.get_filename(),
4335 'This is even more ***fun*** is it not.pdf')
4336 self.assertEqual(m, msg.as_string())
4337
4338 def test_rfc2231_parse_extra_quoting(self):
4339 m = textwrap.dedent('''\
4340 Content-Disposition: inline;
4341 \tfilename*0*="''This%20is%20even%20more%20";
4342 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4343 \tfilename*2="is it not.pdf"
4344
4345 ''')
4346 msg = email.message_from_string(m)
4347 self.assertEqual(msg.get_filename(),
4348 'This is even more ***fun*** is it not.pdf')
4349 self.assertEqual(m, msg.as_string())
4350
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004351 def test_rfc2231_no_language_or_charset(self):
4352 m = '''\
4353Content-Transfer-Encoding: 8bit
4354Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4355Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4356
4357'''
4358 msg = email.message_from_string(m)
4359 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004360 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004361 self.assertEqual(
4362 param,
4363 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4364
4365 def test_rfc2231_no_language_or_charset_in_filename(self):
4366 m = '''\
4367Content-Disposition: inline;
4368\tfilename*0*="''This%20is%20even%20more%20";
4369\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4370\tfilename*2="is it not.pdf"
4371
4372'''
4373 msg = email.message_from_string(m)
4374 self.assertEqual(msg.get_filename(),
4375 'This is even more ***fun*** is it not.pdf')
4376
4377 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4378 m = '''\
4379Content-Disposition: inline;
4380\tfilename*0*="''This%20is%20even%20more%20";
4381\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4382\tfilename*2="is it not.pdf"
4383
4384'''
4385 msg = email.message_from_string(m)
4386 self.assertEqual(msg.get_filename(),
4387 'This is even more ***fun*** is it not.pdf')
4388
4389 def test_rfc2231_partly_encoded(self):
4390 m = '''\
4391Content-Disposition: inline;
4392\tfilename*0="''This%20is%20even%20more%20";
4393\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4394\tfilename*2="is it not.pdf"
4395
4396'''
4397 msg = email.message_from_string(m)
4398 self.assertEqual(
4399 msg.get_filename(),
4400 'This%20is%20even%20more%20***fun*** is it not.pdf')
4401
4402 def test_rfc2231_partly_nonencoded(self):
4403 m = '''\
4404Content-Disposition: inline;
4405\tfilename*0="This%20is%20even%20more%20";
4406\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4407\tfilename*2="is it not.pdf"
4408
4409'''
4410 msg = email.message_from_string(m)
4411 self.assertEqual(
4412 msg.get_filename(),
4413 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4414
4415 def test_rfc2231_no_language_or_charset_in_boundary(self):
4416 m = '''\
4417Content-Type: multipart/alternative;
4418\tboundary*0*="''This%20is%20even%20more%20";
4419\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4420\tboundary*2="is it not.pdf"
4421
4422'''
4423 msg = email.message_from_string(m)
4424 self.assertEqual(msg.get_boundary(),
4425 'This is even more ***fun*** is it not.pdf')
4426
4427 def test_rfc2231_no_language_or_charset_in_charset(self):
4428 # This is a nonsensical charset value, but tests the code anyway
4429 m = '''\
4430Content-Type: text/plain;
4431\tcharset*0*="This%20is%20even%20more%20";
4432\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4433\tcharset*2="is it not.pdf"
4434
4435'''
4436 msg = email.message_from_string(m)
4437 self.assertEqual(msg.get_content_charset(),
4438 'this is even more ***fun*** is it not.pdf')
4439
4440 def test_rfc2231_bad_encoding_in_filename(self):
4441 m = '''\
4442Content-Disposition: inline;
4443\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4444\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4445\tfilename*2="is it not.pdf"
4446
4447'''
4448 msg = email.message_from_string(m)
4449 self.assertEqual(msg.get_filename(),
4450 'This is even more ***fun*** is it not.pdf')
4451
4452 def test_rfc2231_bad_encoding_in_charset(self):
4453 m = """\
4454Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4455
4456"""
4457 msg = email.message_from_string(m)
4458 # This should return None because non-ascii characters in the charset
4459 # are not allowed.
4460 self.assertEqual(msg.get_content_charset(), None)
4461
4462 def test_rfc2231_bad_character_in_charset(self):
4463 m = """\
4464Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4465
4466"""
4467 msg = email.message_from_string(m)
4468 # This should return None because non-ascii characters in the charset
4469 # are not allowed.
4470 self.assertEqual(msg.get_content_charset(), None)
4471
4472 def test_rfc2231_bad_character_in_filename(self):
4473 m = '''\
4474Content-Disposition: inline;
4475\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4476\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4477\tfilename*2*="is it not.pdf%E2"
4478
4479'''
4480 msg = email.message_from_string(m)
4481 self.assertEqual(msg.get_filename(),
4482 'This is even more ***fun*** is it not.pdf\ufffd')
4483
4484 def test_rfc2231_unknown_encoding(self):
4485 m = """\
4486Content-Transfer-Encoding: 8bit
4487Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4488
4489"""
4490 msg = email.message_from_string(m)
4491 self.assertEqual(msg.get_filename(), 'myfile.txt')
4492
4493 def test_rfc2231_single_tick_in_filename_extended(self):
4494 eq = self.assertEqual
4495 m = """\
4496Content-Type: application/x-foo;
4497\tname*0*=\"Frank's\"; name*1*=\" Document\"
4498
4499"""
4500 msg = email.message_from_string(m)
4501 charset, language, s = msg.get_param('name')
4502 eq(charset, None)
4503 eq(language, None)
4504 eq(s, "Frank's Document")
4505
4506 def test_rfc2231_single_tick_in_filename(self):
4507 m = """\
4508Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4509
4510"""
4511 msg = email.message_from_string(m)
4512 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004513 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004514 self.assertEqual(param, "Frank's Document")
4515
4516 def test_rfc2231_tick_attack_extended(self):
4517 eq = self.assertEqual
4518 m = """\
4519Content-Type: application/x-foo;
4520\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4521
4522"""
4523 msg = email.message_from_string(m)
4524 charset, language, s = msg.get_param('name')
4525 eq(charset, 'us-ascii')
4526 eq(language, 'en-us')
4527 eq(s, "Frank's Document")
4528
4529 def test_rfc2231_tick_attack(self):
4530 m = """\
4531Content-Type: application/x-foo;
4532\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4533
4534"""
4535 msg = email.message_from_string(m)
4536 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004537 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004538 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4539
4540 def test_rfc2231_no_extended_values(self):
4541 eq = self.assertEqual
4542 m = """\
4543Content-Type: application/x-foo; name=\"Frank's Document\"
4544
4545"""
4546 msg = email.message_from_string(m)
4547 eq(msg.get_param('name'), "Frank's Document")
4548
4549 def test_rfc2231_encoded_then_unencoded_segments(self):
4550 eq = self.assertEqual
4551 m = """\
4552Content-Type: application/x-foo;
4553\tname*0*=\"us-ascii'en-us'My\";
4554\tname*1=\" Document\";
4555\tname*2*=\" For You\"
4556
4557"""
4558 msg = email.message_from_string(m)
4559 charset, language, s = msg.get_param('name')
4560 eq(charset, 'us-ascii')
4561 eq(language, 'en-us')
4562 eq(s, 'My Document For You')
4563
4564 def test_rfc2231_unencoded_then_encoded_segments(self):
4565 eq = self.assertEqual
4566 m = """\
4567Content-Type: application/x-foo;
4568\tname*0=\"us-ascii'en-us'My\";
4569\tname*1*=\" Document\";
4570\tname*2*=\" For You\"
4571
4572"""
4573 msg = email.message_from_string(m)
4574 charset, language, s = msg.get_param('name')
4575 eq(charset, 'us-ascii')
4576 eq(language, 'en-us')
4577 eq(s, 'My Document For You')
4578
4579
Ezio Melottib3aedd42010-11-20 19:04:17 +00004580
R. David Murraya8f480f2010-01-16 18:30:03 +00004581# Tests to ensure that signed parts of an email are completely preserved, as
4582# required by RFC1847 section 2.1. Note that these are incomplete, because the
4583# email package does not currently always preserve the body. See issue 1670765.
4584class TestSigned(TestEmailBase):
4585
4586 def _msg_and_obj(self, filename):
4587 with openfile(findfile(filename)) as fp:
4588 original = fp.read()
4589 msg = email.message_from_string(original)
4590 return original, msg
4591
4592 def _signed_parts_eq(self, original, result):
4593 # Extract the first mime part of each message
4594 import re
4595 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4596 inpart = repart.search(original).group(2)
4597 outpart = repart.search(result).group(2)
4598 self.assertEqual(outpart, inpart)
4599
4600 def test_long_headers_as_string(self):
4601 original, msg = self._msg_and_obj('msg_45.txt')
4602 result = msg.as_string()
4603 self._signed_parts_eq(original, result)
4604
4605 def test_long_headers_as_string_maxheaderlen(self):
4606 original, msg = self._msg_and_obj('msg_45.txt')
4607 result = msg.as_string(maxheaderlen=60)
4608 self._signed_parts_eq(original, result)
4609
4610 def test_long_headers_flatten(self):
4611 original, msg = self._msg_and_obj('msg_45.txt')
4612 fp = StringIO()
4613 Generator(fp).flatten(msg)
4614 result = fp.getvalue()
4615 self._signed_parts_eq(original, result)
4616
4617
Ezio Melottib3aedd42010-11-20 19:04:17 +00004618
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004619def _testclasses():
4620 mod = sys.modules[__name__]
4621 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
4622
4623
4624def suite():
4625 suite = unittest.TestSuite()
4626 for testclass in _testclasses():
4627 suite.addTest(unittest.makeSuite(testclass))
4628 return suite
4629
4630
4631def test_main():
4632 for testclass in _testclasses():
4633 run_unittest(testclass)
4634
4635
Ezio Melottib3aedd42010-11-20 19:04:17 +00004636
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004637if __name__ == '__main__':
4638 unittest.main(defaultTest='suite')