blob: b4dc575ae673e77945bef6ba2e0a90b789a3c5a8 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R. David Murray96fd54e2010-10-08 15:55:28 +000039from test.support import findfile, run_unittest, unlink
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040from email.test import __file__ as landmark
41
42
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Ezio Melottib3aedd42010-11-20 19:04:17 +000048
Guido van Rossum8b3febe2007-08-30 01:15:14 +000049def openfile(filename, *args, **kws):
50 path = os.path.join(os.path.dirname(landmark), 'data', filename)
51 return open(path, *args, **kws)
52
53
Ezio Melottib3aedd42010-11-20 19:04:17 +000054
Guido van Rossum8b3febe2007-08-30 01:15:14 +000055# Base test class
56class TestEmailBase(unittest.TestCase):
57 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000058 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000059 if first != second:
60 sfirst = str(first)
61 ssecond = str(second)
62 rfirst = [repr(line) for line in sfirst.splitlines()]
63 rsecond = [repr(line) for line in ssecond.splitlines()]
64 diff = difflib.ndiff(rfirst, rsecond)
65 raise self.failureException(NL + NL.join(diff))
66
67 def _msgobj(self, filename):
68 with openfile(findfile(filename)) as fp:
69 return email.message_from_file(fp)
70
71
Ezio Melottib3aedd42010-11-20 19:04:17 +000072
Guido van Rossum8b3febe2007-08-30 01:15:14 +000073# Test various aspects of the Message class's API
74class TestMessageAPI(TestEmailBase):
75 def test_get_all(self):
76 eq = self.assertEqual
77 msg = self._msgobj('msg_20.txt')
78 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
79 eq(msg.get_all('xx', 'n/a'), 'n/a')
80
R. David Murraye5db2632010-11-20 15:10:13 +000081 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 eq = self.assertEqual
83 msg = Message()
84 eq(msg.get_charset(), None)
85 charset = Charset('iso-8859-1')
86 msg.set_charset(charset)
87 eq(msg['mime-version'], '1.0')
88 eq(msg.get_content_type(), 'text/plain')
89 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
90 eq(msg.get_param('charset'), 'iso-8859-1')
91 eq(msg['content-transfer-encoding'], 'quoted-printable')
92 eq(msg.get_charset().input_charset, 'iso-8859-1')
93 # Remove the charset
94 msg.set_charset(None)
95 eq(msg.get_charset(), None)
96 eq(msg['content-type'], 'text/plain')
97 # Try adding a charset when there's already MIME headers present
98 msg = Message()
99 msg['MIME-Version'] = '2.0'
100 msg['Content-Type'] = 'text/x-weird'
101 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
102 msg.set_charset(charset)
103 eq(msg['mime-version'], '2.0')
104 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
105 eq(msg['content-transfer-encoding'], 'quinted-puntable')
106
107 def test_set_charset_from_string(self):
108 eq = self.assertEqual
109 msg = Message()
110 msg.set_charset('us-ascii')
111 eq(msg.get_charset().input_charset, 'us-ascii')
112 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
113
114 def test_set_payload_with_charset(self):
115 msg = Message()
116 charset = Charset('iso-8859-1')
117 msg.set_payload('This is a string payload', charset)
118 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
119
120 def test_get_charsets(self):
121 eq = self.assertEqual
122
123 msg = self._msgobj('msg_08.txt')
124 charsets = msg.get_charsets()
125 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
126
127 msg = self._msgobj('msg_09.txt')
128 charsets = msg.get_charsets('dingbat')
129 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
130 'koi8-r'])
131
132 msg = self._msgobj('msg_12.txt')
133 charsets = msg.get_charsets()
134 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
135 'iso-8859-3', 'us-ascii', 'koi8-r'])
136
137 def test_get_filename(self):
138 eq = self.assertEqual
139
140 msg = self._msgobj('msg_04.txt')
141 filenames = [p.get_filename() for p in msg.get_payload()]
142 eq(filenames, ['msg.txt', 'msg.txt'])
143
144 msg = self._msgobj('msg_07.txt')
145 subpart = msg.get_payload(1)
146 eq(subpart.get_filename(), 'dingusfish.gif')
147
148 def test_get_filename_with_name_parameter(self):
149 eq = self.assertEqual
150
151 msg = self._msgobj('msg_44.txt')
152 filenames = [p.get_filename() for p in msg.get_payload()]
153 eq(filenames, ['msg.txt', 'msg.txt'])
154
155 def test_get_boundary(self):
156 eq = self.assertEqual
157 msg = self._msgobj('msg_07.txt')
158 # No quotes!
159 eq(msg.get_boundary(), 'BOUNDARY')
160
161 def test_set_boundary(self):
162 eq = self.assertEqual
163 # This one has no existing boundary parameter, but the Content-Type:
164 # header appears fifth.
165 msg = self._msgobj('msg_01.txt')
166 msg.set_boundary('BOUNDARY')
167 header, value = msg.items()[4]
168 eq(header.lower(), 'content-type')
169 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
170 # This one has a Content-Type: header, with a boundary, stuck in the
171 # middle of its headers. Make sure the order is preserved; it should
172 # be fifth.
173 msg = self._msgobj('msg_04.txt')
174 msg.set_boundary('BOUNDARY')
175 header, value = msg.items()[4]
176 eq(header.lower(), 'content-type')
177 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
178 # And this one has no Content-Type: header at all.
179 msg = self._msgobj('msg_03.txt')
180 self.assertRaises(errors.HeaderParseError,
181 msg.set_boundary, 'BOUNDARY')
182
R. David Murray73a559d2010-12-21 18:07:59 +0000183 def test_make_boundary(self):
184 msg = MIMEMultipart('form-data')
185 # Note that when the boundary gets created is an implementation
186 # detail and might change.
187 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
188 # Trigger creation of boundary
189 msg.as_string()
190 self.assertEqual(msg.items()[0][1][:33],
191 'multipart/form-data; boundary="==')
192 # XXX: there ought to be tests of the uniqueness of the boundary, too.
193
R. David Murray57c45ac2010-02-21 04:39:40 +0000194 def test_message_rfc822_only(self):
195 # Issue 7970: message/rfc822 not in multipart parsed by
196 # HeaderParser caused an exception when flattened.
Brett Cannon384917a2010-10-29 23:08:36 +0000197 with openfile(findfile('msg_46.txt')) as fp:
198 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000199 parser = HeaderParser()
200 msg = parser.parsestr(msgdata)
201 out = StringIO()
202 gen = Generator(out, True, 0)
203 gen.flatten(msg, False)
204 self.assertEqual(out.getvalue(), msgdata)
205
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000206 def test_get_decoded_payload(self):
207 eq = self.assertEqual
208 msg = self._msgobj('msg_10.txt')
209 # The outer message is a multipart
210 eq(msg.get_payload(decode=True), None)
211 # Subpart 1 is 7bit encoded
212 eq(msg.get_payload(0).get_payload(decode=True),
213 b'This is a 7bit encoded message.\n')
214 # Subpart 2 is quopri
215 eq(msg.get_payload(1).get_payload(decode=True),
216 b'\xa1This is a Quoted Printable encoded message!\n')
217 # Subpart 3 is base64
218 eq(msg.get_payload(2).get_payload(decode=True),
219 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000220 # Subpart 4 is base64 with a trailing newline, which
221 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000222 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000223 b'This is a Base64 encoded message.\n')
224 # Subpart 5 has no Content-Transfer-Encoding: header.
225 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000226 b'This has no Content-Transfer-Encoding: header.\n')
227
228 def test_get_decoded_uu_payload(self):
229 eq = self.assertEqual
230 msg = Message()
231 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
232 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
233 msg['content-transfer-encoding'] = cte
234 eq(msg.get_payload(decode=True), b'hello world')
235 # Now try some bogus data
236 msg.set_payload('foo')
237 eq(msg.get_payload(decode=True), b'foo')
238
239 def test_decoded_generator(self):
240 eq = self.assertEqual
241 msg = self._msgobj('msg_07.txt')
242 with openfile('msg_17.txt') as fp:
243 text = fp.read()
244 s = StringIO()
245 g = DecodedGenerator(s)
246 g.flatten(msg)
247 eq(s.getvalue(), text)
248
249 def test__contains__(self):
250 msg = Message()
251 msg['From'] = 'Me'
252 msg['to'] = 'You'
253 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000254 self.assertTrue('from' in msg)
255 self.assertTrue('From' in msg)
256 self.assertTrue('FROM' in msg)
257 self.assertTrue('to' in msg)
258 self.assertTrue('To' in msg)
259 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000260
261 def test_as_string(self):
262 eq = self.ndiffAssertEqual
263 msg = self._msgobj('msg_01.txt')
264 with openfile('msg_01.txt') as fp:
265 text = fp.read()
266 eq(text, str(msg))
267 fullrepr = msg.as_string(unixfrom=True)
268 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000269 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000270 eq(text, NL.join(lines[1:]))
271
272 def test_bad_param(self):
273 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
274 self.assertEqual(msg.get_param('baz'), '')
275
276 def test_missing_filename(self):
277 msg = email.message_from_string("From: foo\n")
278 self.assertEqual(msg.get_filename(), None)
279
280 def test_bogus_filename(self):
281 msg = email.message_from_string(
282 "Content-Disposition: blarg; filename\n")
283 self.assertEqual(msg.get_filename(), '')
284
285 def test_missing_boundary(self):
286 msg = email.message_from_string("From: foo\n")
287 self.assertEqual(msg.get_boundary(), None)
288
289 def test_get_params(self):
290 eq = self.assertEqual
291 msg = email.message_from_string(
292 'X-Header: foo=one; bar=two; baz=three\n')
293 eq(msg.get_params(header='x-header'),
294 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
295 msg = email.message_from_string(
296 'X-Header: foo; bar=one; baz=two\n')
297 eq(msg.get_params(header='x-header'),
298 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
299 eq(msg.get_params(), None)
300 msg = email.message_from_string(
301 'X-Header: foo; bar="one"; baz=two\n')
302 eq(msg.get_params(header='x-header'),
303 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
304
305 def test_get_param_liberal(self):
306 msg = Message()
307 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
308 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
309
310 def test_get_param(self):
311 eq = self.assertEqual
312 msg = email.message_from_string(
313 "X-Header: foo=one; bar=two; baz=three\n")
314 eq(msg.get_param('bar', header='x-header'), 'two')
315 eq(msg.get_param('quuz', header='x-header'), None)
316 eq(msg.get_param('quuz'), None)
317 msg = email.message_from_string(
318 'X-Header: foo; bar="one"; baz=two\n')
319 eq(msg.get_param('foo', header='x-header'), '')
320 eq(msg.get_param('bar', header='x-header'), 'one')
321 eq(msg.get_param('baz', header='x-header'), 'two')
322 # XXX: We are not RFC-2045 compliant! We cannot parse:
323 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
324 # msg.get_param("weird")
325 # yet.
326
327 def test_get_param_funky_continuation_lines(self):
328 msg = self._msgobj('msg_22.txt')
329 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
330
331 def test_get_param_with_semis_in_quotes(self):
332 msg = email.message_from_string(
333 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
334 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
335 self.assertEqual(msg.get_param('name', unquote=False),
336 '"Jim&amp;&amp;Jill"')
337
R. David Murrayd48739f2010-04-14 18:59:18 +0000338 def test_get_param_with_quotes(self):
339 msg = email.message_from_string(
340 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
341 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
342 msg = email.message_from_string(
343 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
344 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
345
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000346 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000347 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000348 msg = email.message_from_string('Header: exists')
349 unless('header' in msg)
350 unless('Header' in msg)
351 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000352 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000353
354 def test_set_param(self):
355 eq = self.assertEqual
356 msg = Message()
357 msg.set_param('charset', 'iso-2022-jp')
358 eq(msg.get_param('charset'), 'iso-2022-jp')
359 msg.set_param('importance', 'high value')
360 eq(msg.get_param('importance'), 'high value')
361 eq(msg.get_param('importance', unquote=False), '"high value"')
362 eq(msg.get_params(), [('text/plain', ''),
363 ('charset', 'iso-2022-jp'),
364 ('importance', 'high value')])
365 eq(msg.get_params(unquote=False), [('text/plain', ''),
366 ('charset', '"iso-2022-jp"'),
367 ('importance', '"high value"')])
368 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
369 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
370
371 def test_del_param(self):
372 eq = self.assertEqual
373 msg = self._msgobj('msg_05.txt')
374 eq(msg.get_params(),
375 [('multipart/report', ''), ('report-type', 'delivery-status'),
376 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
377 old_val = msg.get_param("report-type")
378 msg.del_param("report-type")
379 eq(msg.get_params(),
380 [('multipart/report', ''),
381 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
382 msg.set_param("report-type", old_val)
383 eq(msg.get_params(),
384 [('multipart/report', ''),
385 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
386 ('report-type', old_val)])
387
388 def test_del_param_on_other_header(self):
389 msg = Message()
390 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
391 msg.del_param('filename', 'content-disposition')
392 self.assertEqual(msg['content-disposition'], 'attachment')
393
394 def test_set_type(self):
395 eq = self.assertEqual
396 msg = Message()
397 self.assertRaises(ValueError, msg.set_type, 'text')
398 msg.set_type('text/plain')
399 eq(msg['content-type'], 'text/plain')
400 msg.set_param('charset', 'us-ascii')
401 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
402 msg.set_type('text/html')
403 eq(msg['content-type'], 'text/html; charset="us-ascii"')
404
405 def test_set_type_on_other_header(self):
406 msg = Message()
407 msg['X-Content-Type'] = 'text/plain'
408 msg.set_type('application/octet-stream', 'X-Content-Type')
409 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
410
411 def test_get_content_type_missing(self):
412 msg = Message()
413 self.assertEqual(msg.get_content_type(), 'text/plain')
414
415 def test_get_content_type_missing_with_default_type(self):
416 msg = Message()
417 msg.set_default_type('message/rfc822')
418 self.assertEqual(msg.get_content_type(), 'message/rfc822')
419
420 def test_get_content_type_from_message_implicit(self):
421 msg = self._msgobj('msg_30.txt')
422 self.assertEqual(msg.get_payload(0).get_content_type(),
423 'message/rfc822')
424
425 def test_get_content_type_from_message_explicit(self):
426 msg = self._msgobj('msg_28.txt')
427 self.assertEqual(msg.get_payload(0).get_content_type(),
428 'message/rfc822')
429
430 def test_get_content_type_from_message_text_plain_implicit(self):
431 msg = self._msgobj('msg_03.txt')
432 self.assertEqual(msg.get_content_type(), 'text/plain')
433
434 def test_get_content_type_from_message_text_plain_explicit(self):
435 msg = self._msgobj('msg_01.txt')
436 self.assertEqual(msg.get_content_type(), 'text/plain')
437
438 def test_get_content_maintype_missing(self):
439 msg = Message()
440 self.assertEqual(msg.get_content_maintype(), 'text')
441
442 def test_get_content_maintype_missing_with_default_type(self):
443 msg = Message()
444 msg.set_default_type('message/rfc822')
445 self.assertEqual(msg.get_content_maintype(), 'message')
446
447 def test_get_content_maintype_from_message_implicit(self):
448 msg = self._msgobj('msg_30.txt')
449 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
450
451 def test_get_content_maintype_from_message_explicit(self):
452 msg = self._msgobj('msg_28.txt')
453 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
454
455 def test_get_content_maintype_from_message_text_plain_implicit(self):
456 msg = self._msgobj('msg_03.txt')
457 self.assertEqual(msg.get_content_maintype(), 'text')
458
459 def test_get_content_maintype_from_message_text_plain_explicit(self):
460 msg = self._msgobj('msg_01.txt')
461 self.assertEqual(msg.get_content_maintype(), 'text')
462
463 def test_get_content_subtype_missing(self):
464 msg = Message()
465 self.assertEqual(msg.get_content_subtype(), 'plain')
466
467 def test_get_content_subtype_missing_with_default_type(self):
468 msg = Message()
469 msg.set_default_type('message/rfc822')
470 self.assertEqual(msg.get_content_subtype(), 'rfc822')
471
472 def test_get_content_subtype_from_message_implicit(self):
473 msg = self._msgobj('msg_30.txt')
474 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
475
476 def test_get_content_subtype_from_message_explicit(self):
477 msg = self._msgobj('msg_28.txt')
478 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
479
480 def test_get_content_subtype_from_message_text_plain_implicit(self):
481 msg = self._msgobj('msg_03.txt')
482 self.assertEqual(msg.get_content_subtype(), 'plain')
483
484 def test_get_content_subtype_from_message_text_plain_explicit(self):
485 msg = self._msgobj('msg_01.txt')
486 self.assertEqual(msg.get_content_subtype(), 'plain')
487
488 def test_get_content_maintype_error(self):
489 msg = Message()
490 msg['Content-Type'] = 'no-slash-in-this-string'
491 self.assertEqual(msg.get_content_maintype(), 'text')
492
493 def test_get_content_subtype_error(self):
494 msg = Message()
495 msg['Content-Type'] = 'no-slash-in-this-string'
496 self.assertEqual(msg.get_content_subtype(), 'plain')
497
498 def test_replace_header(self):
499 eq = self.assertEqual
500 msg = Message()
501 msg.add_header('First', 'One')
502 msg.add_header('Second', 'Two')
503 msg.add_header('Third', 'Three')
504 eq(msg.keys(), ['First', 'Second', 'Third'])
505 eq(msg.values(), ['One', 'Two', 'Three'])
506 msg.replace_header('Second', 'Twenty')
507 eq(msg.keys(), ['First', 'Second', 'Third'])
508 eq(msg.values(), ['One', 'Twenty', 'Three'])
509 msg.add_header('First', 'Eleven')
510 msg.replace_header('First', 'One Hundred')
511 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
512 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
513 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
514
515 def test_broken_base64_payload(self):
516 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
517 msg = Message()
518 msg['content-type'] = 'audio/x-midi'
519 msg['content-transfer-encoding'] = 'base64'
520 msg.set_payload(x)
521 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000522 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000523
R. David Murray7ec754b2010-12-13 23:51:19 +0000524 # Issue 1078919
525 def test_ascii_add_header(self):
526 msg = Message()
527 msg.add_header('Content-Disposition', 'attachment',
528 filename='bud.gif')
529 self.assertEqual('attachment; filename="bud.gif"',
530 msg['Content-Disposition'])
531
532 def test_noascii_add_header(self):
533 msg = Message()
534 msg.add_header('Content-Disposition', 'attachment',
535 filename="Fußballer.ppt")
536 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000537 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000538 msg['Content-Disposition'])
539
540 def test_nonascii_add_header_via_triple(self):
541 msg = Message()
542 msg.add_header('Content-Disposition', 'attachment',
543 filename=('iso-8859-1', '', 'Fußballer.ppt'))
544 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000545 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
546 msg['Content-Disposition'])
547
548 def test_ascii_add_header_with_tspecial(self):
549 msg = Message()
550 msg.add_header('Content-Disposition', 'attachment',
551 filename="windows [filename].ppt")
552 self.assertEqual(
553 'attachment; filename="windows [filename].ppt"',
554 msg['Content-Disposition'])
555
556 def test_nonascii_add_header_with_tspecial(self):
557 msg = Message()
558 msg.add_header('Content-Disposition', 'attachment',
559 filename="Fußballer [filename].ppt")
560 self.assertEqual(
561 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000562 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000563
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000564 # Issue 5871: reject an attempt to embed a header inside a header value
565 # (header injection attack).
566 def test_embeded_header_via_Header_rejected(self):
567 msg = Message()
568 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
569 self.assertRaises(errors.HeaderParseError, msg.as_string)
570
571 def test_embeded_header_via_string_rejected(self):
572 msg = Message()
573 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
574 self.assertRaises(errors.HeaderParseError, msg.as_string)
575
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000576# Test the email.encoders module
577class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400578
579 def test_EncodersEncode_base64(self):
580 with openfile('PyBanner048.gif', 'rb') as fp:
581 bindata = fp.read()
582 mimed = email.mime.image.MIMEImage(bindata)
583 base64ed = mimed.get_payload()
584 # the transfer-encoded body lines should all be <=76 characters
585 lines = base64ed.split('\n')
586 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
587
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000588 def test_encode_empty_payload(self):
589 eq = self.assertEqual
590 msg = Message()
591 msg.set_charset('us-ascii')
592 eq(msg['content-transfer-encoding'], '7bit')
593
594 def test_default_cte(self):
595 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000596 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000597 msg = MIMEText('hello world')
598 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000599 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000600 msg = MIMEText('hello \xf8 world')
601 eq(msg['content-transfer-encoding'], '8bit')
602 # And now with a different charset
603 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
604 eq(msg['content-transfer-encoding'], 'quoted-printable')
605
R. David Murraye85200d2010-05-06 01:41:14 +0000606 def test_encode7or8bit(self):
607 # Make sure a charset whose input character set is 8bit but
608 # whose output character set is 7bit gets a transfer-encoding
609 # of 7bit.
610 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000611 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000612 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000613
Ezio Melottib3aedd42010-11-20 19:04:17 +0000614
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000615# Test long header wrapping
616class TestLongHeaders(TestEmailBase):
617 def test_split_long_continuation(self):
618 eq = self.ndiffAssertEqual
619 msg = email.message_from_string("""\
620Subject: bug demonstration
621\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
622\tmore text
623
624test
625""")
626 sfp = StringIO()
627 g = Generator(sfp)
628 g.flatten(msg)
629 eq(sfp.getvalue(), """\
630Subject: bug demonstration
631\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
632\tmore text
633
634test
635""")
636
637 def test_another_long_almost_unsplittable_header(self):
638 eq = self.ndiffAssertEqual
639 hstr = """\
640bug demonstration
641\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
642\tmore text"""
643 h = Header(hstr, continuation_ws='\t')
644 eq(h.encode(), """\
645bug demonstration
646\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
647\tmore text""")
648 h = Header(hstr.replace('\t', ' '))
649 eq(h.encode(), """\
650bug demonstration
651 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
652 more text""")
653
654 def test_long_nonstring(self):
655 eq = self.ndiffAssertEqual
656 g = Charset("iso-8859-1")
657 cz = Charset("iso-8859-2")
658 utf8 = Charset("utf-8")
659 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
660 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
661 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
662 b'bef\xf6rdert. ')
663 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
664 b'd\xf9vtipu.. ')
665 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
666 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
667 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
668 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
669 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
670 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
671 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
672 '\u3044\u307e\u3059\u3002')
673 h = Header(g_head, g, header_name='Subject')
674 h.append(cz_head, cz)
675 h.append(utf8_head, utf8)
676 msg = Message()
677 msg['Subject'] = h
678 sfp = StringIO()
679 g = Generator(sfp)
680 g.flatten(msg)
681 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000682Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
683 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
684 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
685 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
686 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
687 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
688 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
689 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
690 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
691 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
692 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000693
694""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000695 eq(h.encode(maxlinelen=76), """\
696=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
697 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
698 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
699 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
700 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
701 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
702 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
703 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
704 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
705 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
706 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000707
708 def test_long_header_encode(self):
709 eq = self.ndiffAssertEqual
710 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
711 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
712 header_name='X-Foobar-Spoink-Defrobnit')
713 eq(h.encode(), '''\
714wasnipoop; giraffes="very-long-necked-animals";
715 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
716
717 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
718 eq = self.ndiffAssertEqual
719 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
720 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
721 header_name='X-Foobar-Spoink-Defrobnit',
722 continuation_ws='\t')
723 eq(h.encode(), '''\
724wasnipoop; giraffes="very-long-necked-animals";
725 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
726
727 def test_long_header_encode_with_tab_continuation(self):
728 eq = self.ndiffAssertEqual
729 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
730 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
731 header_name='X-Foobar-Spoink-Defrobnit',
732 continuation_ws='\t')
733 eq(h.encode(), '''\
734wasnipoop; giraffes="very-long-necked-animals";
735\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
736
R David Murray3a6152f2011-03-14 21:13:03 -0400737 def test_header_encode_with_different_output_charset(self):
738 h = Header('文', 'euc-jp')
739 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
740
741 def test_long_header_encode_with_different_output_charset(self):
742 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
743 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
744 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
745 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
746 res = """\
747=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
748 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
749 self.assertEqual(h.encode(), res)
750
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000751 def test_header_splitter(self):
752 eq = self.ndiffAssertEqual
753 msg = MIMEText('')
754 # It'd be great if we could use add_header() here, but that doesn't
755 # guarantee an order of the parameters.
756 msg['X-Foobar-Spoink-Defrobnit'] = (
757 'wasnipoop; giraffes="very-long-necked-animals"; '
758 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
759 sfp = StringIO()
760 g = Generator(sfp)
761 g.flatten(msg)
762 eq(sfp.getvalue(), '''\
763Content-Type: text/plain; charset="us-ascii"
764MIME-Version: 1.0
765Content-Transfer-Encoding: 7bit
766X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
767 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
768
769''')
770
771 def test_no_semis_header_splitter(self):
772 eq = self.ndiffAssertEqual
773 msg = Message()
774 msg['From'] = 'test@dom.ain'
775 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
776 msg.set_payload('Test')
777 sfp = StringIO()
778 g = Generator(sfp)
779 g.flatten(msg)
780 eq(sfp.getvalue(), """\
781From: test@dom.ain
782References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
783 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
784
785Test""")
786
787 def test_no_split_long_header(self):
788 eq = self.ndiffAssertEqual
789 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000790 h = Header(hstr)
791 # These come on two lines because Headers are really field value
792 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000793 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000794References:
795 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
796 h = Header('x' * 80)
797 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000798
799 def test_splitting_multiple_long_lines(self):
800 eq = self.ndiffAssertEqual
801 hstr = """\
802from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
803\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
804\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
805"""
806 h = Header(hstr, continuation_ws='\t')
807 eq(h.encode(), """\
808from babylon.socal-raves.org (localhost [127.0.0.1]);
809 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
810 for <mailman-admin@babylon.socal-raves.org>;
811 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
812\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
813 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
814 for <mailman-admin@babylon.socal-raves.org>;
815 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
816\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
817 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
818 for <mailman-admin@babylon.socal-raves.org>;
819 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
820
821 def test_splitting_first_line_only_is_long(self):
822 eq = self.ndiffAssertEqual
823 hstr = """\
824from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
825\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
826\tid 17k4h5-00034i-00
827\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
828 h = Header(hstr, maxlinelen=78, header_name='Received',
829 continuation_ws='\t')
830 eq(h.encode(), """\
831from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
832 helo=cthulhu.gerg.ca)
833\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
834\tid 17k4h5-00034i-00
835\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
836
837 def test_long_8bit_header(self):
838 eq = self.ndiffAssertEqual
839 msg = Message()
840 h = Header('Britische Regierung gibt', 'iso-8859-1',
841 header_name='Subject')
842 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +0000843 eq(h.encode(maxlinelen=76), """\
844=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
845 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000846 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +0000847 eq(msg.as_string(maxheaderlen=76), """\
848Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
849 =?iso-8859-1?q?hore-Windkraftprojekte?=
850
851""")
852 eq(msg.as_string(maxheaderlen=0), """\
853Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000854
855""")
856
857 def test_long_8bit_header_no_charset(self):
858 eq = self.ndiffAssertEqual
859 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +0000860 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
861 'f\xfcr Offshore-Windkraftprojekte '
862 '<a-very-long-address@example.com>')
863 msg['Reply-To'] = header_string
864 self.assertRaises(UnicodeEncodeError, msg.as_string)
865 msg = Message()
866 msg['Reply-To'] = Header(header_string, 'utf-8',
867 header_name='Reply-To')
868 eq(msg.as_string(maxheaderlen=78), """\
869Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
870 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000871
872""")
873
874 def test_long_to_header(self):
875 eq = self.ndiffAssertEqual
876 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
877 '<someone@eecs.umich.edu>,'
878 '"Someone Test #B" <someone@umich.edu>, '
879 '"Someone Test #C" <someone@eecs.umich.edu>, '
880 '"Someone Test #D" <someone@eecs.umich.edu>')
881 msg = Message()
882 msg['To'] = to
883 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +0000884To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000885 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +0000886 "Someone Test #C" <someone@eecs.umich.edu>,
887 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000888
889''')
890
891 def test_long_line_after_append(self):
892 eq = self.ndiffAssertEqual
893 s = 'This is an example of string which has almost the limit of header length.'
894 h = Header(s)
895 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +0000896 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000897This is an example of string which has almost the limit of header length.
898 Add another line.""")
899
900 def test_shorter_line_with_append(self):
901 eq = self.ndiffAssertEqual
902 s = 'This is a shorter line.'
903 h = Header(s)
904 h.append('Add another sentence. (Surprise?)')
905 eq(h.encode(),
906 'This is a shorter line. Add another sentence. (Surprise?)')
907
908 def test_long_field_name(self):
909 eq = self.ndiffAssertEqual
910 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +0000911 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
912 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
913 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
914 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000915 h = Header(gs, 'iso-8859-1', header_name=fn)
916 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +0000917 eq(h.encode(maxlinelen=76), """\
918=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
919 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
920 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
921 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000922
923 def test_long_received_header(self):
924 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
925 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
926 'Wed, 05 Mar 2003 18:10:18 -0700')
927 msg = Message()
928 msg['Received-1'] = Header(h, continuation_ws='\t')
929 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000930 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000931 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000932Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
933 Wed, 05 Mar 2003 18:10:18 -0700
934Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
935 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000936
937""")
938
939 def test_string_headerinst_eq(self):
940 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
941 'tu-muenchen.de> (David Bremner\'s message of '
942 '"Thu, 6 Mar 2003 13:58:21 +0100")')
943 msg = Message()
944 msg['Received-1'] = Header(h, header_name='Received-1',
945 continuation_ws='\t')
946 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000947 # XXX This should be splitting on spaces not commas.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000948 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000949Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
950 6 Mar 2003 13:58:21 +0100\")
951Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
952 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000953
954""")
955
956 def test_long_unbreakable_lines_with_continuation(self):
957 eq = self.ndiffAssertEqual
958 msg = Message()
959 t = """\
960iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
961 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
962 msg['Face-1'] = t
963 msg['Face-2'] = Header(t, header_name='Face-2')
Barry Warsawbef9d212007-08-31 10:55:37 +0000964 # XXX This splitting is all wrong. It the first value line should be
965 # snug against the field name.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000966 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +0000967Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000968 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000969 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +0000970Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +0000971 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000972 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
973
974""")
975
976 def test_another_long_multiline_header(self):
977 eq = self.ndiffAssertEqual
978 m = ('Received: from siimage.com '
979 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +0000980 'Microsoft SMTPSVC(5.0.2195.4905); '
981 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000982 msg = email.message_from_string(m)
983 eq(msg.as_string(maxheaderlen=78), '''\
Barry Warsawbef9d212007-08-31 10:55:37 +0000984Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
985 Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000986
987''')
988
989 def test_long_lines_with_different_header(self):
990 eq = self.ndiffAssertEqual
991 h = ('List-Unsubscribe: '
992 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
993 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
994 '?subject=unsubscribe>')
995 msg = Message()
996 msg['List'] = h
997 msg['List'] = Header(h, header_name='List')
998 eq(msg.as_string(maxheaderlen=78), """\
999List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001000 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001001List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001002 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001003
1004""")
1005
R. David Murray6f0022d2011-01-07 21:57:25 +00001006 def test_long_rfc2047_header_with_embedded_fws(self):
1007 h = Header(textwrap.dedent("""\
1008 We're going to pretend this header is in a non-ascii character set
1009 \tto see if line wrapping with encoded words and embedded
1010 folding white space works"""),
1011 charset='utf-8',
1012 header_name='Test')
1013 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1014 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1015 =?utf-8?q?cter_set?=
1016 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1017 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1018
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001019
Ezio Melottib3aedd42010-11-20 19:04:17 +00001020
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001021# Test mangling of "From " lines in the body of a message
1022class TestFromMangling(unittest.TestCase):
1023 def setUp(self):
1024 self.msg = Message()
1025 self.msg['From'] = 'aaa@bbb.org'
1026 self.msg.set_payload("""\
1027From the desk of A.A.A.:
1028Blah blah blah
1029""")
1030
1031 def test_mangled_from(self):
1032 s = StringIO()
1033 g = Generator(s, mangle_from_=True)
1034 g.flatten(self.msg)
1035 self.assertEqual(s.getvalue(), """\
1036From: aaa@bbb.org
1037
1038>From the desk of A.A.A.:
1039Blah blah blah
1040""")
1041
1042 def test_dont_mangle_from(self):
1043 s = StringIO()
1044 g = Generator(s, mangle_from_=False)
1045 g.flatten(self.msg)
1046 self.assertEqual(s.getvalue(), """\
1047From: aaa@bbb.org
1048
1049From the desk of A.A.A.:
1050Blah blah blah
1051""")
1052
1053
Ezio Melottib3aedd42010-11-20 19:04:17 +00001054
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001055# Test the basic MIMEAudio class
1056class TestMIMEAudio(unittest.TestCase):
1057 def setUp(self):
1058 # Make sure we pick up the audiotest.au that lives in email/test/data.
1059 # In Python, there's an audiotest.au living in Lib/test but that isn't
1060 # included in some binary distros that don't include the test
1061 # package. The trailing empty string on the .join() is significant
1062 # since findfile() will do a dirname().
1063 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
1064 with open(findfile('audiotest.au', datadir), 'rb') as fp:
1065 self._audiodata = fp.read()
1066 self._au = MIMEAudio(self._audiodata)
1067
1068 def test_guess_minor_type(self):
1069 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1070
1071 def test_encoding(self):
1072 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001073 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1074 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001075
1076 def test_checkSetMinor(self):
1077 au = MIMEAudio(self._audiodata, 'fish')
1078 self.assertEqual(au.get_content_type(), 'audio/fish')
1079
1080 def test_add_header(self):
1081 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001082 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001083 self._au.add_header('Content-Disposition', 'attachment',
1084 filename='audiotest.au')
1085 eq(self._au['content-disposition'],
1086 'attachment; filename="audiotest.au"')
1087 eq(self._au.get_params(header='content-disposition'),
1088 [('attachment', ''), ('filename', 'audiotest.au')])
1089 eq(self._au.get_param('filename', header='content-disposition'),
1090 'audiotest.au')
1091 missing = []
1092 eq(self._au.get_param('attachment', header='content-disposition'), '')
1093 unless(self._au.get_param('foo', failobj=missing,
1094 header='content-disposition') is missing)
1095 # Try some missing stuff
1096 unless(self._au.get_param('foobar', missing) is missing)
1097 unless(self._au.get_param('attachment', missing,
1098 header='foobar') is missing)
1099
1100
Ezio Melottib3aedd42010-11-20 19:04:17 +00001101
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001102# Test the basic MIMEImage class
1103class TestMIMEImage(unittest.TestCase):
1104 def setUp(self):
1105 with openfile('PyBanner048.gif', 'rb') as fp:
1106 self._imgdata = fp.read()
1107 self._im = MIMEImage(self._imgdata)
1108
1109 def test_guess_minor_type(self):
1110 self.assertEqual(self._im.get_content_type(), 'image/gif')
1111
1112 def test_encoding(self):
1113 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001114 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1115 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001116
1117 def test_checkSetMinor(self):
1118 im = MIMEImage(self._imgdata, 'fish')
1119 self.assertEqual(im.get_content_type(), 'image/fish')
1120
1121 def test_add_header(self):
1122 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001123 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001124 self._im.add_header('Content-Disposition', 'attachment',
1125 filename='dingusfish.gif')
1126 eq(self._im['content-disposition'],
1127 'attachment; filename="dingusfish.gif"')
1128 eq(self._im.get_params(header='content-disposition'),
1129 [('attachment', ''), ('filename', 'dingusfish.gif')])
1130 eq(self._im.get_param('filename', header='content-disposition'),
1131 'dingusfish.gif')
1132 missing = []
1133 eq(self._im.get_param('attachment', header='content-disposition'), '')
1134 unless(self._im.get_param('foo', failobj=missing,
1135 header='content-disposition') is missing)
1136 # Try some missing stuff
1137 unless(self._im.get_param('foobar', missing) is missing)
1138 unless(self._im.get_param('attachment', missing,
1139 header='foobar') is missing)
1140
1141
Ezio Melottib3aedd42010-11-20 19:04:17 +00001142
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001143# Test the basic MIMEApplication class
1144class TestMIMEApplication(unittest.TestCase):
1145 def test_headers(self):
1146 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001147 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001148 eq(msg.get_content_type(), 'application/octet-stream')
1149 eq(msg['content-transfer-encoding'], 'base64')
1150
1151 def test_body(self):
1152 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001153 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1154 msg = MIMEApplication(bytesdata)
1155 # whitespace in the cte encoded block is RFC-irrelevant.
1156 eq(msg.get_payload().strip(), '+vv8/f7/')
1157 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001158
1159
Ezio Melottib3aedd42010-11-20 19:04:17 +00001160
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001161# Test the basic MIMEText class
1162class TestMIMEText(unittest.TestCase):
1163 def setUp(self):
1164 self._msg = MIMEText('hello there')
1165
1166 def test_types(self):
1167 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001168 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001169 eq(self._msg.get_content_type(), 'text/plain')
1170 eq(self._msg.get_param('charset'), 'us-ascii')
1171 missing = []
1172 unless(self._msg.get_param('foobar', missing) is missing)
1173 unless(self._msg.get_param('charset', missing, header='foobar')
1174 is missing)
1175
1176 def test_payload(self):
1177 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001178 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001179
1180 def test_charset(self):
1181 eq = self.assertEqual
1182 msg = MIMEText('hello there', _charset='us-ascii')
1183 eq(msg.get_charset().input_charset, 'us-ascii')
1184 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1185
R. David Murray850fc852010-06-03 01:58:28 +00001186 def test_7bit_input(self):
1187 eq = self.assertEqual
1188 msg = MIMEText('hello there', _charset='us-ascii')
1189 eq(msg.get_charset().input_charset, 'us-ascii')
1190 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1191
1192 def test_7bit_input_no_charset(self):
1193 eq = self.assertEqual
1194 msg = MIMEText('hello there')
1195 eq(msg.get_charset(), 'us-ascii')
1196 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1197 self.assertTrue('hello there' in msg.as_string())
1198
1199 def test_utf8_input(self):
1200 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1201 eq = self.assertEqual
1202 msg = MIMEText(teststr, _charset='utf-8')
1203 eq(msg.get_charset().output_charset, 'utf-8')
1204 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1205 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1206
1207 @unittest.skip("can't fix because of backward compat in email5, "
1208 "will fix in email6")
1209 def test_utf8_input_no_charset(self):
1210 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1211 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1212
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001213
Ezio Melottib3aedd42010-11-20 19:04:17 +00001214
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001215# Test complicated multipart/* messages
1216class TestMultipart(TestEmailBase):
1217 def setUp(self):
1218 with openfile('PyBanner048.gif', 'rb') as fp:
1219 data = fp.read()
1220 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1221 image = MIMEImage(data, name='dingusfish.gif')
1222 image.add_header('content-disposition', 'attachment',
1223 filename='dingusfish.gif')
1224 intro = MIMEText('''\
1225Hi there,
1226
1227This is the dingus fish.
1228''')
1229 container.attach(intro)
1230 container.attach(image)
1231 container['From'] = 'Barry <barry@digicool.com>'
1232 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1233 container['Subject'] = 'Here is your dingus fish'
1234
1235 now = 987809702.54848599
1236 timetuple = time.localtime(now)
1237 if timetuple[-1] == 0:
1238 tzsecs = time.timezone
1239 else:
1240 tzsecs = time.altzone
1241 if tzsecs > 0:
1242 sign = '-'
1243 else:
1244 sign = '+'
1245 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1246 container['Date'] = time.strftime(
1247 '%a, %d %b %Y %H:%M:%S',
1248 time.localtime(now)) + tzoffset
1249 self._msg = container
1250 self._im = image
1251 self._txt = intro
1252
1253 def test_hierarchy(self):
1254 # convenience
1255 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001256 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001257 raises = self.assertRaises
1258 # tests
1259 m = self._msg
1260 unless(m.is_multipart())
1261 eq(m.get_content_type(), 'multipart/mixed')
1262 eq(len(m.get_payload()), 2)
1263 raises(IndexError, m.get_payload, 2)
1264 m0 = m.get_payload(0)
1265 m1 = m.get_payload(1)
1266 unless(m0 is self._txt)
1267 unless(m1 is self._im)
1268 eq(m.get_payload(), [m0, m1])
1269 unless(not m0.is_multipart())
1270 unless(not m1.is_multipart())
1271
1272 def test_empty_multipart_idempotent(self):
1273 text = """\
1274Content-Type: multipart/mixed; boundary="BOUNDARY"
1275MIME-Version: 1.0
1276Subject: A subject
1277To: aperson@dom.ain
1278From: bperson@dom.ain
1279
1280
1281--BOUNDARY
1282
1283
1284--BOUNDARY--
1285"""
1286 msg = Parser().parsestr(text)
1287 self.ndiffAssertEqual(text, msg.as_string())
1288
1289 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1290 outer = MIMEBase('multipart', 'mixed')
1291 outer['Subject'] = 'A subject'
1292 outer['To'] = 'aperson@dom.ain'
1293 outer['From'] = 'bperson@dom.ain'
1294 outer.set_boundary('BOUNDARY')
1295 self.ndiffAssertEqual(outer.as_string(), '''\
1296Content-Type: multipart/mixed; boundary="BOUNDARY"
1297MIME-Version: 1.0
1298Subject: A subject
1299To: aperson@dom.ain
1300From: bperson@dom.ain
1301
1302--BOUNDARY
1303
1304--BOUNDARY--''')
1305
1306 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1307 outer = MIMEBase('multipart', 'mixed')
1308 outer['Subject'] = 'A subject'
1309 outer['To'] = 'aperson@dom.ain'
1310 outer['From'] = 'bperson@dom.ain'
1311 outer.preamble = ''
1312 outer.epilogue = ''
1313 outer.set_boundary('BOUNDARY')
1314 self.ndiffAssertEqual(outer.as_string(), '''\
1315Content-Type: multipart/mixed; boundary="BOUNDARY"
1316MIME-Version: 1.0
1317Subject: A subject
1318To: aperson@dom.ain
1319From: bperson@dom.ain
1320
1321
1322--BOUNDARY
1323
1324--BOUNDARY--
1325''')
1326
1327 def test_one_part_in_a_multipart(self):
1328 eq = self.ndiffAssertEqual
1329 outer = MIMEBase('multipart', 'mixed')
1330 outer['Subject'] = 'A subject'
1331 outer['To'] = 'aperson@dom.ain'
1332 outer['From'] = 'bperson@dom.ain'
1333 outer.set_boundary('BOUNDARY')
1334 msg = MIMEText('hello world')
1335 outer.attach(msg)
1336 eq(outer.as_string(), '''\
1337Content-Type: multipart/mixed; boundary="BOUNDARY"
1338MIME-Version: 1.0
1339Subject: A subject
1340To: aperson@dom.ain
1341From: bperson@dom.ain
1342
1343--BOUNDARY
1344Content-Type: text/plain; charset="us-ascii"
1345MIME-Version: 1.0
1346Content-Transfer-Encoding: 7bit
1347
1348hello world
1349--BOUNDARY--''')
1350
1351 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1352 eq = self.ndiffAssertEqual
1353 outer = MIMEBase('multipart', 'mixed')
1354 outer['Subject'] = 'A subject'
1355 outer['To'] = 'aperson@dom.ain'
1356 outer['From'] = 'bperson@dom.ain'
1357 outer.preamble = ''
1358 msg = MIMEText('hello world')
1359 outer.attach(msg)
1360 outer.set_boundary('BOUNDARY')
1361 eq(outer.as_string(), '''\
1362Content-Type: multipart/mixed; boundary="BOUNDARY"
1363MIME-Version: 1.0
1364Subject: A subject
1365To: aperson@dom.ain
1366From: bperson@dom.ain
1367
1368
1369--BOUNDARY
1370Content-Type: text/plain; charset="us-ascii"
1371MIME-Version: 1.0
1372Content-Transfer-Encoding: 7bit
1373
1374hello world
1375--BOUNDARY--''')
1376
1377
1378 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1379 eq = self.ndiffAssertEqual
1380 outer = MIMEBase('multipart', 'mixed')
1381 outer['Subject'] = 'A subject'
1382 outer['To'] = 'aperson@dom.ain'
1383 outer['From'] = 'bperson@dom.ain'
1384 outer.preamble = None
1385 msg = MIMEText('hello world')
1386 outer.attach(msg)
1387 outer.set_boundary('BOUNDARY')
1388 eq(outer.as_string(), '''\
1389Content-Type: multipart/mixed; boundary="BOUNDARY"
1390MIME-Version: 1.0
1391Subject: A subject
1392To: aperson@dom.ain
1393From: bperson@dom.ain
1394
1395--BOUNDARY
1396Content-Type: text/plain; charset="us-ascii"
1397MIME-Version: 1.0
1398Content-Transfer-Encoding: 7bit
1399
1400hello world
1401--BOUNDARY--''')
1402
1403
1404 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1405 eq = self.ndiffAssertEqual
1406 outer = MIMEBase('multipart', 'mixed')
1407 outer['Subject'] = 'A subject'
1408 outer['To'] = 'aperson@dom.ain'
1409 outer['From'] = 'bperson@dom.ain'
1410 outer.epilogue = None
1411 msg = MIMEText('hello world')
1412 outer.attach(msg)
1413 outer.set_boundary('BOUNDARY')
1414 eq(outer.as_string(), '''\
1415Content-Type: multipart/mixed; boundary="BOUNDARY"
1416MIME-Version: 1.0
1417Subject: A subject
1418To: aperson@dom.ain
1419From: bperson@dom.ain
1420
1421--BOUNDARY
1422Content-Type: text/plain; charset="us-ascii"
1423MIME-Version: 1.0
1424Content-Transfer-Encoding: 7bit
1425
1426hello world
1427--BOUNDARY--''')
1428
1429
1430 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1431 eq = self.ndiffAssertEqual
1432 outer = MIMEBase('multipart', 'mixed')
1433 outer['Subject'] = 'A subject'
1434 outer['To'] = 'aperson@dom.ain'
1435 outer['From'] = 'bperson@dom.ain'
1436 outer.epilogue = ''
1437 msg = MIMEText('hello world')
1438 outer.attach(msg)
1439 outer.set_boundary('BOUNDARY')
1440 eq(outer.as_string(), '''\
1441Content-Type: multipart/mixed; boundary="BOUNDARY"
1442MIME-Version: 1.0
1443Subject: A subject
1444To: aperson@dom.ain
1445From: bperson@dom.ain
1446
1447--BOUNDARY
1448Content-Type: text/plain; charset="us-ascii"
1449MIME-Version: 1.0
1450Content-Transfer-Encoding: 7bit
1451
1452hello world
1453--BOUNDARY--
1454''')
1455
1456
1457 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1458 eq = self.ndiffAssertEqual
1459 outer = MIMEBase('multipart', 'mixed')
1460 outer['Subject'] = 'A subject'
1461 outer['To'] = 'aperson@dom.ain'
1462 outer['From'] = 'bperson@dom.ain'
1463 outer.epilogue = '\n'
1464 msg = MIMEText('hello world')
1465 outer.attach(msg)
1466 outer.set_boundary('BOUNDARY')
1467 eq(outer.as_string(), '''\
1468Content-Type: multipart/mixed; boundary="BOUNDARY"
1469MIME-Version: 1.0
1470Subject: A subject
1471To: aperson@dom.ain
1472From: bperson@dom.ain
1473
1474--BOUNDARY
1475Content-Type: text/plain; charset="us-ascii"
1476MIME-Version: 1.0
1477Content-Transfer-Encoding: 7bit
1478
1479hello world
1480--BOUNDARY--
1481
1482''')
1483
1484 def test_message_external_body(self):
1485 eq = self.assertEqual
1486 msg = self._msgobj('msg_36.txt')
1487 eq(len(msg.get_payload()), 2)
1488 msg1 = msg.get_payload(1)
1489 eq(msg1.get_content_type(), 'multipart/alternative')
1490 eq(len(msg1.get_payload()), 2)
1491 for subpart in msg1.get_payload():
1492 eq(subpart.get_content_type(), 'message/external-body')
1493 eq(len(subpart.get_payload()), 1)
1494 subsubpart = subpart.get_payload(0)
1495 eq(subsubpart.get_content_type(), 'text/plain')
1496
1497 def test_double_boundary(self):
1498 # msg_37.txt is a multipart that contains two dash-boundary's in a
1499 # row. Our interpretation of RFC 2046 calls for ignoring the second
1500 # and subsequent boundaries.
1501 msg = self._msgobj('msg_37.txt')
1502 self.assertEqual(len(msg.get_payload()), 3)
1503
1504 def test_nested_inner_contains_outer_boundary(self):
1505 eq = self.ndiffAssertEqual
1506 # msg_38.txt has an inner part that contains outer boundaries. My
1507 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1508 # these are illegal and should be interpreted as unterminated inner
1509 # parts.
1510 msg = self._msgobj('msg_38.txt')
1511 sfp = StringIO()
1512 iterators._structure(msg, sfp)
1513 eq(sfp.getvalue(), """\
1514multipart/mixed
1515 multipart/mixed
1516 multipart/alternative
1517 text/plain
1518 text/plain
1519 text/plain
1520 text/plain
1521""")
1522
1523 def test_nested_with_same_boundary(self):
1524 eq = self.ndiffAssertEqual
1525 # msg 39.txt is similarly evil in that it's got inner parts that use
1526 # the same boundary as outer parts. Again, I believe the way this is
1527 # parsed is closest to the spirit of RFC 2046
1528 msg = self._msgobj('msg_39.txt')
1529 sfp = StringIO()
1530 iterators._structure(msg, sfp)
1531 eq(sfp.getvalue(), """\
1532multipart/mixed
1533 multipart/mixed
1534 multipart/alternative
1535 application/octet-stream
1536 application/octet-stream
1537 text/plain
1538""")
1539
1540 def test_boundary_in_non_multipart(self):
1541 msg = self._msgobj('msg_40.txt')
1542 self.assertEqual(msg.as_string(), '''\
1543MIME-Version: 1.0
1544Content-Type: text/html; boundary="--961284236552522269"
1545
1546----961284236552522269
1547Content-Type: text/html;
1548Content-Transfer-Encoding: 7Bit
1549
1550<html></html>
1551
1552----961284236552522269--
1553''')
1554
1555 def test_boundary_with_leading_space(self):
1556 eq = self.assertEqual
1557 msg = email.message_from_string('''\
1558MIME-Version: 1.0
1559Content-Type: multipart/mixed; boundary=" XXXX"
1560
1561-- XXXX
1562Content-Type: text/plain
1563
1564
1565-- XXXX
1566Content-Type: text/plain
1567
1568-- XXXX--
1569''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001570 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001571 eq(msg.get_boundary(), ' XXXX')
1572 eq(len(msg.get_payload()), 2)
1573
1574 def test_boundary_without_trailing_newline(self):
1575 m = Parser().parsestr("""\
1576Content-Type: multipart/mixed; boundary="===============0012394164=="
1577MIME-Version: 1.0
1578
1579--===============0012394164==
1580Content-Type: image/file1.jpg
1581MIME-Version: 1.0
1582Content-Transfer-Encoding: base64
1583
1584YXNkZg==
1585--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001586 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001587
1588
Ezio Melottib3aedd42010-11-20 19:04:17 +00001589
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001590# Test some badly formatted messages
1591class TestNonConformant(TestEmailBase):
1592 def test_parse_missing_minor_type(self):
1593 eq = self.assertEqual
1594 msg = self._msgobj('msg_14.txt')
1595 eq(msg.get_content_type(), 'text/plain')
1596 eq(msg.get_content_maintype(), 'text')
1597 eq(msg.get_content_subtype(), 'plain')
1598
1599 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001600 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001601 msg = self._msgobj('msg_15.txt')
1602 # XXX We can probably eventually do better
1603 inner = msg.get_payload(0)
1604 unless(hasattr(inner, 'defects'))
1605 self.assertEqual(len(inner.defects), 1)
1606 unless(isinstance(inner.defects[0],
1607 errors.StartBoundaryNotFoundDefect))
1608
1609 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001610 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001611 msg = self._msgobj('msg_25.txt')
1612 unless(isinstance(msg.get_payload(), str))
1613 self.assertEqual(len(msg.defects), 2)
1614 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1615 unless(isinstance(msg.defects[1],
1616 errors.MultipartInvariantViolationDefect))
1617
1618 def test_invalid_content_type(self):
1619 eq = self.assertEqual
1620 neq = self.ndiffAssertEqual
1621 msg = Message()
1622 # RFC 2045, $5.2 says invalid yields text/plain
1623 msg['Content-Type'] = 'text'
1624 eq(msg.get_content_maintype(), 'text')
1625 eq(msg.get_content_subtype(), 'plain')
1626 eq(msg.get_content_type(), 'text/plain')
1627 # Clear the old value and try something /really/ invalid
1628 del msg['content-type']
1629 msg['Content-Type'] = 'foo'
1630 eq(msg.get_content_maintype(), 'text')
1631 eq(msg.get_content_subtype(), 'plain')
1632 eq(msg.get_content_type(), 'text/plain')
1633 # Still, make sure that the message is idempotently generated
1634 s = StringIO()
1635 g = Generator(s)
1636 g.flatten(msg)
1637 neq(s.getvalue(), 'Content-Type: foo\n\n')
1638
1639 def test_no_start_boundary(self):
1640 eq = self.ndiffAssertEqual
1641 msg = self._msgobj('msg_31.txt')
1642 eq(msg.get_payload(), """\
1643--BOUNDARY
1644Content-Type: text/plain
1645
1646message 1
1647
1648--BOUNDARY
1649Content-Type: text/plain
1650
1651message 2
1652
1653--BOUNDARY--
1654""")
1655
1656 def test_no_separating_blank_line(self):
1657 eq = self.ndiffAssertEqual
1658 msg = self._msgobj('msg_35.txt')
1659 eq(msg.as_string(), """\
1660From: aperson@dom.ain
1661To: bperson@dom.ain
1662Subject: here's something interesting
1663
1664counter to RFC 2822, there's no separating newline here
1665""")
1666
1667 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001668 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001669 msg = self._msgobj('msg_41.txt')
1670 unless(hasattr(msg, 'defects'))
1671 self.assertEqual(len(msg.defects), 2)
1672 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1673 unless(isinstance(msg.defects[1],
1674 errors.MultipartInvariantViolationDefect))
1675
1676 def test_missing_start_boundary(self):
1677 outer = self._msgobj('msg_42.txt')
1678 # The message structure is:
1679 #
1680 # multipart/mixed
1681 # text/plain
1682 # message/rfc822
1683 # multipart/mixed [*]
1684 #
1685 # [*] This message is missing its start boundary
1686 bad = outer.get_payload(1).get_payload(0)
1687 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001688 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001689 errors.StartBoundaryNotFoundDefect))
1690
1691 def test_first_line_is_continuation_header(self):
1692 eq = self.assertEqual
1693 m = ' Line 1\nLine 2\nLine 3'
1694 msg = email.message_from_string(m)
1695 eq(msg.keys(), [])
1696 eq(msg.get_payload(), 'Line 2\nLine 3')
1697 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001698 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001699 errors.FirstHeaderLineIsContinuationDefect))
1700 eq(msg.defects[0].line, ' Line 1\n')
1701
1702
Ezio Melottib3aedd42010-11-20 19:04:17 +00001703
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001704# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001705class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001706 def test_rfc2047_multiline(self):
1707 eq = self.assertEqual
1708 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1709 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1710 dh = decode_header(s)
1711 eq(dh, [
1712 (b'Re:', None),
1713 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1714 (b'baz foo bar', None),
1715 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1716 header = make_header(dh)
1717 eq(str(header),
1718 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001719 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001720Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1721 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001722
1723 def test_whitespace_eater_unicode(self):
1724 eq = self.assertEqual
1725 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1726 dh = decode_header(s)
1727 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1728 (b'Pirard <pirard@dom.ain>', None)])
1729 header = str(make_header(dh))
1730 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1731
1732 def test_whitespace_eater_unicode_2(self):
1733 eq = self.assertEqual
1734 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1735 dh = decode_header(s)
1736 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1737 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1738 hu = str(make_header(dh))
1739 eq(hu, 'The quick brown fox jumped over the lazy dog')
1740
1741 def test_rfc2047_missing_whitespace(self):
1742 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1743 dh = decode_header(s)
1744 self.assertEqual(dh, [(s, None)])
1745
1746 def test_rfc2047_with_whitespace(self):
1747 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1748 dh = decode_header(s)
1749 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1750 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1751 (b'sbord', None)])
1752
R. David Murrayc4e69cc2010-08-03 22:14:10 +00001753 def test_rfc2047_B_bad_padding(self):
1754 s = '=?iso-8859-1?B?%s?='
1755 data = [ # only test complete bytes
1756 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
1757 ('dmk=', b'vi'), ('dmk', b'vi')
1758 ]
1759 for q, a in data:
1760 dh = decode_header(s % q)
1761 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001762
R. David Murray31e984c2010-10-01 15:40:20 +00001763 def test_rfc2047_Q_invalid_digits(self):
1764 # issue 10004.
1765 s = '=?iso-8659-1?Q?andr=e9=zz?='
1766 self.assertEqual(decode_header(s),
1767 [(b'andr\xe9=zz', 'iso-8659-1')])
1768
Ezio Melottib3aedd42010-11-20 19:04:17 +00001769
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001770# Test the MIMEMessage class
1771class TestMIMEMessage(TestEmailBase):
1772 def setUp(self):
1773 with openfile('msg_11.txt') as fp:
1774 self._text = fp.read()
1775
1776 def test_type_error(self):
1777 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1778
1779 def test_valid_argument(self):
1780 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001781 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001782 subject = 'A sub-message'
1783 m = Message()
1784 m['Subject'] = subject
1785 r = MIMEMessage(m)
1786 eq(r.get_content_type(), 'message/rfc822')
1787 payload = r.get_payload()
1788 unless(isinstance(payload, list))
1789 eq(len(payload), 1)
1790 subpart = payload[0]
1791 unless(subpart is m)
1792 eq(subpart['subject'], subject)
1793
1794 def test_bad_multipart(self):
1795 eq = self.assertEqual
1796 msg1 = Message()
1797 msg1['Subject'] = 'subpart 1'
1798 msg2 = Message()
1799 msg2['Subject'] = 'subpart 2'
1800 r = MIMEMessage(msg1)
1801 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
1802
1803 def test_generate(self):
1804 # First craft the message to be encapsulated
1805 m = Message()
1806 m['Subject'] = 'An enclosed message'
1807 m.set_payload('Here is the body of the message.\n')
1808 r = MIMEMessage(m)
1809 r['Subject'] = 'The enclosing message'
1810 s = StringIO()
1811 g = Generator(s)
1812 g.flatten(r)
1813 self.assertEqual(s.getvalue(), """\
1814Content-Type: message/rfc822
1815MIME-Version: 1.0
1816Subject: The enclosing message
1817
1818Subject: An enclosed message
1819
1820Here is the body of the message.
1821""")
1822
1823 def test_parse_message_rfc822(self):
1824 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001825 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001826 msg = self._msgobj('msg_11.txt')
1827 eq(msg.get_content_type(), 'message/rfc822')
1828 payload = msg.get_payload()
1829 unless(isinstance(payload, list))
1830 eq(len(payload), 1)
1831 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001832 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001833 eq(submsg['subject'], 'An enclosed message')
1834 eq(submsg.get_payload(), 'Here is the body of the message.\n')
1835
1836 def test_dsn(self):
1837 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001838 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001839 # msg 16 is a Delivery Status Notification, see RFC 1894
1840 msg = self._msgobj('msg_16.txt')
1841 eq(msg.get_content_type(), 'multipart/report')
1842 unless(msg.is_multipart())
1843 eq(len(msg.get_payload()), 3)
1844 # Subpart 1 is a text/plain, human readable section
1845 subpart = msg.get_payload(0)
1846 eq(subpart.get_content_type(), 'text/plain')
1847 eq(subpart.get_payload(), """\
1848This report relates to a message you sent with the following header fields:
1849
1850 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
1851 Date: Sun, 23 Sep 2001 20:10:55 -0700
1852 From: "Ian T. Henry" <henryi@oxy.edu>
1853 To: SoCal Raves <scr@socal-raves.org>
1854 Subject: [scr] yeah for Ians!!
1855
1856Your message cannot be delivered to the following recipients:
1857
1858 Recipient address: jangel1@cougar.noc.ucla.edu
1859 Reason: recipient reached disk quota
1860
1861""")
1862 # Subpart 2 contains the machine parsable DSN information. It
1863 # consists of two blocks of headers, represented by two nested Message
1864 # objects.
1865 subpart = msg.get_payload(1)
1866 eq(subpart.get_content_type(), 'message/delivery-status')
1867 eq(len(subpart.get_payload()), 2)
1868 # message/delivery-status should treat each block as a bunch of
1869 # headers, i.e. a bunch of Message objects.
1870 dsn1 = subpart.get_payload(0)
1871 unless(isinstance(dsn1, Message))
1872 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
1873 eq(dsn1.get_param('dns', header='reporting-mta'), '')
1874 # Try a missing one <wink>
1875 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
1876 dsn2 = subpart.get_payload(1)
1877 unless(isinstance(dsn2, Message))
1878 eq(dsn2['action'], 'failed')
1879 eq(dsn2.get_params(header='original-recipient'),
1880 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
1881 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
1882 # Subpart 3 is the original message
1883 subpart = msg.get_payload(2)
1884 eq(subpart.get_content_type(), 'message/rfc822')
1885 payload = subpart.get_payload()
1886 unless(isinstance(payload, list))
1887 eq(len(payload), 1)
1888 subsubpart = payload[0]
1889 unless(isinstance(subsubpart, Message))
1890 eq(subsubpart.get_content_type(), 'text/plain')
1891 eq(subsubpart['message-id'],
1892 '<002001c144a6$8752e060$56104586@oxy.edu>')
1893
1894 def test_epilogue(self):
1895 eq = self.ndiffAssertEqual
1896 with openfile('msg_21.txt') as fp:
1897 text = fp.read()
1898 msg = Message()
1899 msg['From'] = 'aperson@dom.ain'
1900 msg['To'] = 'bperson@dom.ain'
1901 msg['Subject'] = 'Test'
1902 msg.preamble = 'MIME message'
1903 msg.epilogue = 'End of MIME message\n'
1904 msg1 = MIMEText('One')
1905 msg2 = MIMEText('Two')
1906 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1907 msg.attach(msg1)
1908 msg.attach(msg2)
1909 sfp = StringIO()
1910 g = Generator(sfp)
1911 g.flatten(msg)
1912 eq(sfp.getvalue(), text)
1913
1914 def test_no_nl_preamble(self):
1915 eq = self.ndiffAssertEqual
1916 msg = Message()
1917 msg['From'] = 'aperson@dom.ain'
1918 msg['To'] = 'bperson@dom.ain'
1919 msg['Subject'] = 'Test'
1920 msg.preamble = 'MIME message'
1921 msg.epilogue = ''
1922 msg1 = MIMEText('One')
1923 msg2 = MIMEText('Two')
1924 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1925 msg.attach(msg1)
1926 msg.attach(msg2)
1927 eq(msg.as_string(), """\
1928From: aperson@dom.ain
1929To: bperson@dom.ain
1930Subject: Test
1931Content-Type: multipart/mixed; boundary="BOUNDARY"
1932
1933MIME message
1934--BOUNDARY
1935Content-Type: text/plain; charset="us-ascii"
1936MIME-Version: 1.0
1937Content-Transfer-Encoding: 7bit
1938
1939One
1940--BOUNDARY
1941Content-Type: text/plain; charset="us-ascii"
1942MIME-Version: 1.0
1943Content-Transfer-Encoding: 7bit
1944
1945Two
1946--BOUNDARY--
1947""")
1948
1949 def test_default_type(self):
1950 eq = self.assertEqual
1951 with openfile('msg_30.txt') as fp:
1952 msg = email.message_from_file(fp)
1953 container1 = msg.get_payload(0)
1954 eq(container1.get_default_type(), 'message/rfc822')
1955 eq(container1.get_content_type(), 'message/rfc822')
1956 container2 = msg.get_payload(1)
1957 eq(container2.get_default_type(), 'message/rfc822')
1958 eq(container2.get_content_type(), 'message/rfc822')
1959 container1a = container1.get_payload(0)
1960 eq(container1a.get_default_type(), 'text/plain')
1961 eq(container1a.get_content_type(), 'text/plain')
1962 container2a = container2.get_payload(0)
1963 eq(container2a.get_default_type(), 'text/plain')
1964 eq(container2a.get_content_type(), 'text/plain')
1965
1966 def test_default_type_with_explicit_container_type(self):
1967 eq = self.assertEqual
1968 with openfile('msg_28.txt') as fp:
1969 msg = email.message_from_file(fp)
1970 container1 = msg.get_payload(0)
1971 eq(container1.get_default_type(), 'message/rfc822')
1972 eq(container1.get_content_type(), 'message/rfc822')
1973 container2 = msg.get_payload(1)
1974 eq(container2.get_default_type(), 'message/rfc822')
1975 eq(container2.get_content_type(), 'message/rfc822')
1976 container1a = container1.get_payload(0)
1977 eq(container1a.get_default_type(), 'text/plain')
1978 eq(container1a.get_content_type(), 'text/plain')
1979 container2a = container2.get_payload(0)
1980 eq(container2a.get_default_type(), 'text/plain')
1981 eq(container2a.get_content_type(), 'text/plain')
1982
1983 def test_default_type_non_parsed(self):
1984 eq = self.assertEqual
1985 neq = self.ndiffAssertEqual
1986 # Set up container
1987 container = MIMEMultipart('digest', 'BOUNDARY')
1988 container.epilogue = ''
1989 # Set up subparts
1990 subpart1a = MIMEText('message 1\n')
1991 subpart2a = MIMEText('message 2\n')
1992 subpart1 = MIMEMessage(subpart1a)
1993 subpart2 = MIMEMessage(subpart2a)
1994 container.attach(subpart1)
1995 container.attach(subpart2)
1996 eq(subpart1.get_content_type(), 'message/rfc822')
1997 eq(subpart1.get_default_type(), 'message/rfc822')
1998 eq(subpart2.get_content_type(), 'message/rfc822')
1999 eq(subpart2.get_default_type(), 'message/rfc822')
2000 neq(container.as_string(0), '''\
2001Content-Type: multipart/digest; boundary="BOUNDARY"
2002MIME-Version: 1.0
2003
2004--BOUNDARY
2005Content-Type: message/rfc822
2006MIME-Version: 1.0
2007
2008Content-Type: text/plain; charset="us-ascii"
2009MIME-Version: 1.0
2010Content-Transfer-Encoding: 7bit
2011
2012message 1
2013
2014--BOUNDARY
2015Content-Type: message/rfc822
2016MIME-Version: 1.0
2017
2018Content-Type: text/plain; charset="us-ascii"
2019MIME-Version: 1.0
2020Content-Transfer-Encoding: 7bit
2021
2022message 2
2023
2024--BOUNDARY--
2025''')
2026 del subpart1['content-type']
2027 del subpart1['mime-version']
2028 del subpart2['content-type']
2029 del subpart2['mime-version']
2030 eq(subpart1.get_content_type(), 'message/rfc822')
2031 eq(subpart1.get_default_type(), 'message/rfc822')
2032 eq(subpart2.get_content_type(), 'message/rfc822')
2033 eq(subpart2.get_default_type(), 'message/rfc822')
2034 neq(container.as_string(0), '''\
2035Content-Type: multipart/digest; boundary="BOUNDARY"
2036MIME-Version: 1.0
2037
2038--BOUNDARY
2039
2040Content-Type: text/plain; charset="us-ascii"
2041MIME-Version: 1.0
2042Content-Transfer-Encoding: 7bit
2043
2044message 1
2045
2046--BOUNDARY
2047
2048Content-Type: text/plain; charset="us-ascii"
2049MIME-Version: 1.0
2050Content-Transfer-Encoding: 7bit
2051
2052message 2
2053
2054--BOUNDARY--
2055''')
2056
2057 def test_mime_attachments_in_constructor(self):
2058 eq = self.assertEqual
2059 text1 = MIMEText('')
2060 text2 = MIMEText('')
2061 msg = MIMEMultipart(_subparts=(text1, text2))
2062 eq(len(msg.get_payload()), 2)
2063 eq(msg.get_payload(0), text1)
2064 eq(msg.get_payload(1), text2)
2065
Christian Heimes587c2bf2008-01-19 16:21:02 +00002066 def test_default_multipart_constructor(self):
2067 msg = MIMEMultipart()
2068 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002069
Ezio Melottib3aedd42010-11-20 19:04:17 +00002070
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002071# A general test of parser->model->generator idempotency. IOW, read a message
2072# in, parse it into a message object tree, then without touching the tree,
2073# regenerate the plain text. The original text and the transformed text
2074# should be identical. Note: that we ignore the Unix-From since that may
2075# contain a changed date.
2076class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002077
2078 linesep = '\n'
2079
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002080 def _msgobj(self, filename):
2081 with openfile(filename) as fp:
2082 data = fp.read()
2083 msg = email.message_from_string(data)
2084 return msg, data
2085
R. David Murray719a4492010-11-21 16:53:48 +00002086 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002087 eq = self.ndiffAssertEqual
2088 s = StringIO()
2089 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002090 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002091 eq(text, s.getvalue())
2092
2093 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002094 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002095 msg, text = self._msgobj('msg_01.txt')
2096 eq(msg.get_content_type(), 'text/plain')
2097 eq(msg.get_content_maintype(), 'text')
2098 eq(msg.get_content_subtype(), 'plain')
2099 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2100 eq(msg.get_param('charset'), 'us-ascii')
2101 eq(msg.preamble, None)
2102 eq(msg.epilogue, None)
2103 self._idempotent(msg, text)
2104
2105 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002106 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002107 msg, text = self._msgobj('msg_03.txt')
2108 eq(msg.get_content_type(), 'text/plain')
2109 eq(msg.get_params(), None)
2110 eq(msg.get_param('charset'), None)
2111 self._idempotent(msg, text)
2112
2113 def test_simple_multipart(self):
2114 msg, text = self._msgobj('msg_04.txt')
2115 self._idempotent(msg, text)
2116
2117 def test_MIME_digest(self):
2118 msg, text = self._msgobj('msg_02.txt')
2119 self._idempotent(msg, text)
2120
2121 def test_long_header(self):
2122 msg, text = self._msgobj('msg_27.txt')
2123 self._idempotent(msg, text)
2124
2125 def test_MIME_digest_with_part_headers(self):
2126 msg, text = self._msgobj('msg_28.txt')
2127 self._idempotent(msg, text)
2128
2129 def test_mixed_with_image(self):
2130 msg, text = self._msgobj('msg_06.txt')
2131 self._idempotent(msg, text)
2132
2133 def test_multipart_report(self):
2134 msg, text = self._msgobj('msg_05.txt')
2135 self._idempotent(msg, text)
2136
2137 def test_dsn(self):
2138 msg, text = self._msgobj('msg_16.txt')
2139 self._idempotent(msg, text)
2140
2141 def test_preamble_epilogue(self):
2142 msg, text = self._msgobj('msg_21.txt')
2143 self._idempotent(msg, text)
2144
2145 def test_multipart_one_part(self):
2146 msg, text = self._msgobj('msg_23.txt')
2147 self._idempotent(msg, text)
2148
2149 def test_multipart_no_parts(self):
2150 msg, text = self._msgobj('msg_24.txt')
2151 self._idempotent(msg, text)
2152
2153 def test_no_start_boundary(self):
2154 msg, text = self._msgobj('msg_31.txt')
2155 self._idempotent(msg, text)
2156
2157 def test_rfc2231_charset(self):
2158 msg, text = self._msgobj('msg_32.txt')
2159 self._idempotent(msg, text)
2160
2161 def test_more_rfc2231_parameters(self):
2162 msg, text = self._msgobj('msg_33.txt')
2163 self._idempotent(msg, text)
2164
2165 def test_text_plain_in_a_multipart_digest(self):
2166 msg, text = self._msgobj('msg_34.txt')
2167 self._idempotent(msg, text)
2168
2169 def test_nested_multipart_mixeds(self):
2170 msg, text = self._msgobj('msg_12a.txt')
2171 self._idempotent(msg, text)
2172
2173 def test_message_external_body_idempotent(self):
2174 msg, text = self._msgobj('msg_36.txt')
2175 self._idempotent(msg, text)
2176
R. David Murray719a4492010-11-21 16:53:48 +00002177 def test_message_delivery_status(self):
2178 msg, text = self._msgobj('msg_43.txt')
2179 self._idempotent(msg, text, unixfrom=True)
2180
R. David Murray96fd54e2010-10-08 15:55:28 +00002181 def test_message_signed_idempotent(self):
2182 msg, text = self._msgobj('msg_45.txt')
2183 self._idempotent(msg, text)
2184
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002185 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002186 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002187 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002188 # Get a message object and reset the seek pointer for other tests
2189 msg, text = self._msgobj('msg_05.txt')
2190 eq(msg.get_content_type(), 'multipart/report')
2191 # Test the Content-Type: parameters
2192 params = {}
2193 for pk, pv in msg.get_params():
2194 params[pk] = pv
2195 eq(params['report-type'], 'delivery-status')
2196 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002197 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2198 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002199 eq(len(msg.get_payload()), 3)
2200 # Make sure the subparts are what we expect
2201 msg1 = msg.get_payload(0)
2202 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002203 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002204 msg2 = msg.get_payload(1)
2205 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002206 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002207 msg3 = msg.get_payload(2)
2208 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002209 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002210 payload = msg3.get_payload()
2211 unless(isinstance(payload, list))
2212 eq(len(payload), 1)
2213 msg4 = payload[0]
2214 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002215 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002216
2217 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002218 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002219 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002220 msg, text = self._msgobj('msg_06.txt')
2221 # Check some of the outer headers
2222 eq(msg.get_content_type(), 'message/rfc822')
2223 # Make sure the payload is a list of exactly one sub-Message, and that
2224 # that submessage has a type of text/plain
2225 payload = msg.get_payload()
2226 unless(isinstance(payload, list))
2227 eq(len(payload), 1)
2228 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002229 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002230 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002231 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002232 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002233
2234
Ezio Melottib3aedd42010-11-20 19:04:17 +00002235
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002236# Test various other bits of the package's functionality
2237class TestMiscellaneous(TestEmailBase):
2238 def test_message_from_string(self):
2239 with openfile('msg_01.txt') as fp:
2240 text = fp.read()
2241 msg = email.message_from_string(text)
2242 s = StringIO()
2243 # Don't wrap/continue long headers since we're trying to test
2244 # idempotency.
2245 g = Generator(s, maxheaderlen=0)
2246 g.flatten(msg)
2247 self.assertEqual(text, s.getvalue())
2248
2249 def test_message_from_file(self):
2250 with openfile('msg_01.txt') as fp:
2251 text = fp.read()
2252 fp.seek(0)
2253 msg = email.message_from_file(fp)
2254 s = StringIO()
2255 # Don't wrap/continue long headers since we're trying to test
2256 # idempotency.
2257 g = Generator(s, maxheaderlen=0)
2258 g.flatten(msg)
2259 self.assertEqual(text, s.getvalue())
2260
2261 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002262 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002263 with openfile('msg_01.txt') as fp:
2264 text = fp.read()
2265
2266 # Create a subclass
2267 class MyMessage(Message):
2268 pass
2269
2270 msg = email.message_from_string(text, MyMessage)
2271 unless(isinstance(msg, MyMessage))
2272 # Try something more complicated
2273 with openfile('msg_02.txt') as fp:
2274 text = fp.read()
2275 msg = email.message_from_string(text, MyMessage)
2276 for subpart in msg.walk():
2277 unless(isinstance(subpart, MyMessage))
2278
2279 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002280 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002281 # Create a subclass
2282 class MyMessage(Message):
2283 pass
2284
2285 with openfile('msg_01.txt') as fp:
2286 msg = email.message_from_file(fp, MyMessage)
2287 unless(isinstance(msg, MyMessage))
2288 # Try something more complicated
2289 with openfile('msg_02.txt') as fp:
2290 msg = email.message_from_file(fp, MyMessage)
2291 for subpart in msg.walk():
2292 unless(isinstance(subpart, MyMessage))
2293
2294 def test__all__(self):
2295 module = __import__('email')
2296 # Can't use sorted() here due to Python 2.3 compatibility
2297 all = module.__all__[:]
2298 all.sort()
2299 self.assertEqual(all, [
2300 'base64mime', 'charset', 'encoders', 'errors', 'generator',
R. David Murray96fd54e2010-10-08 15:55:28 +00002301 'header', 'iterators', 'message', 'message_from_binary_file',
2302 'message_from_bytes', 'message_from_file',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002303 'message_from_string', 'mime', 'parser',
2304 'quoprimime', 'utils',
2305 ])
2306
2307 def test_formatdate(self):
2308 now = time.time()
2309 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2310 time.gmtime(now)[:6])
2311
2312 def test_formatdate_localtime(self):
2313 now = time.time()
2314 self.assertEqual(
2315 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2316 time.localtime(now)[:6])
2317
2318 def test_formatdate_usegmt(self):
2319 now = time.time()
2320 self.assertEqual(
2321 utils.formatdate(now, localtime=False),
2322 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2323 self.assertEqual(
2324 utils.formatdate(now, localtime=False, usegmt=True),
2325 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2326
2327 def test_parsedate_none(self):
2328 self.assertEqual(utils.parsedate(''), None)
2329
2330 def test_parsedate_compact(self):
2331 # The FWS after the comma is optional
2332 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2333 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2334
2335 def test_parsedate_no_dayofweek(self):
2336 eq = self.assertEqual
2337 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2338 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2339
2340 def test_parsedate_compact_no_dayofweek(self):
2341 eq = self.assertEqual
2342 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2343 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2344
R. David Murray4a62e892010-12-23 20:35:46 +00002345 def test_parsedate_no_space_before_positive_offset(self):
2346 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2347 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2348
2349 def test_parsedate_no_space_before_negative_offset(self):
2350 # Issue 1155362: we already handled '+' for this case.
2351 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2352 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2353
2354
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002355 def test_parsedate_acceptable_to_time_functions(self):
2356 eq = self.assertEqual
2357 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2358 t = int(time.mktime(timetup))
2359 eq(time.localtime(t)[:6], timetup[:6])
2360 eq(int(time.strftime('%Y', timetup)), 2003)
2361 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2362 t = int(time.mktime(timetup[:9]))
2363 eq(time.localtime(t)[:6], timetup[:6])
2364 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2365
R. David Murray219d1c82010-08-25 00:45:55 +00002366 def test_parsedate_y2k(self):
2367 """Test for parsing a date with a two-digit year.
2368
2369 Parsing a date with a two-digit year should return the correct
2370 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2371 obsoletes RFC822) requires four-digit years.
2372
2373 """
2374 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2375 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2376 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2377 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2378
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002379 def test_parseaddr_empty(self):
2380 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2381 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2382
2383 def test_noquote_dump(self):
2384 self.assertEqual(
2385 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2386 'A Silly Person <person@dom.ain>')
2387
2388 def test_escape_dump(self):
2389 self.assertEqual(
2390 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2391 r'"A \(Very\) Silly Person" <person@dom.ain>')
2392 a = r'A \(Special\) Person'
2393 b = 'person@dom.ain'
2394 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2395
2396 def test_escape_backslashes(self):
2397 self.assertEqual(
2398 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2399 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2400 a = r'Arthur \Backslash\ Foobar'
2401 b = 'person@dom.ain'
2402 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2403
2404 def test_name_with_dot(self):
2405 x = 'John X. Doe <jxd@example.com>'
2406 y = '"John X. Doe" <jxd@example.com>'
2407 a, b = ('John X. Doe', 'jxd@example.com')
2408 self.assertEqual(utils.parseaddr(x), (a, b))
2409 self.assertEqual(utils.parseaddr(y), (a, b))
2410 # formataddr() quotes the name if there's a dot in it
2411 self.assertEqual(utils.formataddr((a, b)), y)
2412
R. David Murray5397e862010-10-02 15:58:26 +00002413 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2414 # issue 10005. Note that in the third test the second pair of
2415 # backslashes is not actually a quoted pair because it is not inside a
2416 # comment or quoted string: the address being parsed has a quoted
2417 # string containing a quoted backslash, followed by 'example' and two
2418 # backslashes, followed by another quoted string containing a space and
2419 # the word 'example'. parseaddr copies those two backslashes
2420 # literally. Per rfc5322 this is not technically correct since a \ may
2421 # not appear in an address outside of a quoted string. It is probably
2422 # a sensible Postel interpretation, though.
2423 eq = self.assertEqual
2424 eq(utils.parseaddr('""example" example"@example.com'),
2425 ('', '""example" example"@example.com'))
2426 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2427 ('', '"\\"example\\" example"@example.com'))
2428 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2429 ('', '"\\\\"example\\\\" example"@example.com'))
2430
R. David Murray63563cd2010-12-18 18:25:38 +00002431 def test_parseaddr_preserves_spaces_in_local_part(self):
2432 # issue 9286. A normal RFC5322 local part should not contain any
2433 # folding white space, but legacy local parts can (they are a sequence
2434 # of atoms, not dotatoms). On the other hand we strip whitespace from
2435 # before the @ and around dots, on the assumption that the whitespace
2436 # around the punctuation is a mistake in what would otherwise be
2437 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2438 self.assertEqual(('', "merwok wok@xample.com"),
2439 utils.parseaddr("merwok wok@xample.com"))
2440 self.assertEqual(('', "merwok wok@xample.com"),
2441 utils.parseaddr("merwok wok@xample.com"))
2442 self.assertEqual(('', "merwok wok@xample.com"),
2443 utils.parseaddr(" merwok wok @xample.com"))
2444 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2445 utils.parseaddr('merwok"wok" wok@xample.com'))
2446 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2447 utils.parseaddr('merwok. wok . wok@xample.com'))
2448
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002449 def test_multiline_from_comment(self):
2450 x = """\
2451Foo
2452\tBar <foo@example.com>"""
2453 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2454
2455 def test_quote_dump(self):
2456 self.assertEqual(
2457 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2458 r'"A Silly; Person" <person@dom.ain>')
2459
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002460 def test_charset_richcomparisons(self):
2461 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002462 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002463 cset1 = Charset()
2464 cset2 = Charset()
2465 eq(cset1, 'us-ascii')
2466 eq(cset1, 'US-ASCII')
2467 eq(cset1, 'Us-AsCiI')
2468 eq('us-ascii', cset1)
2469 eq('US-ASCII', cset1)
2470 eq('Us-AsCiI', cset1)
2471 ne(cset1, 'usascii')
2472 ne(cset1, 'USASCII')
2473 ne(cset1, 'UsAsCiI')
2474 ne('usascii', cset1)
2475 ne('USASCII', cset1)
2476 ne('UsAsCiI', cset1)
2477 eq(cset1, cset2)
2478 eq(cset2, cset1)
2479
2480 def test_getaddresses(self):
2481 eq = self.assertEqual
2482 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2483 'Bud Person <bperson@dom.ain>']),
2484 [('Al Person', 'aperson@dom.ain'),
2485 ('Bud Person', 'bperson@dom.ain')])
2486
2487 def test_getaddresses_nasty(self):
2488 eq = self.assertEqual
2489 eq(utils.getaddresses(['foo: ;']), [('', '')])
2490 eq(utils.getaddresses(
2491 ['[]*-- =~$']),
2492 [('', ''), ('', ''), ('', '*--')])
2493 eq(utils.getaddresses(
2494 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2495 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2496
2497 def test_getaddresses_embedded_comment(self):
2498 """Test proper handling of a nested comment"""
2499 eq = self.assertEqual
2500 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2501 eq(addrs[0][1], 'foo@bar.com')
2502
2503 def test_utils_quote_unquote(self):
2504 eq = self.assertEqual
2505 msg = Message()
2506 msg.add_header('content-disposition', 'attachment',
2507 filename='foo\\wacky"name')
2508 eq(msg.get_filename(), 'foo\\wacky"name')
2509
2510 def test_get_body_encoding_with_bogus_charset(self):
2511 charset = Charset('not a charset')
2512 self.assertEqual(charset.get_body_encoding(), 'base64')
2513
2514 def test_get_body_encoding_with_uppercase_charset(self):
2515 eq = self.assertEqual
2516 msg = Message()
2517 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2518 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2519 charsets = msg.get_charsets()
2520 eq(len(charsets), 1)
2521 eq(charsets[0], 'utf-8')
2522 charset = Charset(charsets[0])
2523 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002524 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002525 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2526 eq(msg.get_payload(decode=True), b'hello world')
2527 eq(msg['content-transfer-encoding'], 'base64')
2528 # Try another one
2529 msg = Message()
2530 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2531 charsets = msg.get_charsets()
2532 eq(len(charsets), 1)
2533 eq(charsets[0], 'us-ascii')
2534 charset = Charset(charsets[0])
2535 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2536 msg.set_payload('hello world', charset=charset)
2537 eq(msg.get_payload(), 'hello world')
2538 eq(msg['content-transfer-encoding'], '7bit')
2539
2540 def test_charsets_case_insensitive(self):
2541 lc = Charset('us-ascii')
2542 uc = Charset('US-ASCII')
2543 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2544
2545 def test_partial_falls_inside_message_delivery_status(self):
2546 eq = self.ndiffAssertEqual
2547 # The Parser interface provides chunks of data to FeedParser in 8192
2548 # byte gulps. SF bug #1076485 found one of those chunks inside
2549 # message/delivery-status header block, which triggered an
2550 # unreadline() of NeedMoreData.
2551 msg = self._msgobj('msg_43.txt')
2552 sfp = StringIO()
2553 iterators._structure(msg, sfp)
2554 eq(sfp.getvalue(), """\
2555multipart/report
2556 text/plain
2557 message/delivery-status
2558 text/plain
2559 text/plain
2560 text/plain
2561 text/plain
2562 text/plain
2563 text/plain
2564 text/plain
2565 text/plain
2566 text/plain
2567 text/plain
2568 text/plain
2569 text/plain
2570 text/plain
2571 text/plain
2572 text/plain
2573 text/plain
2574 text/plain
2575 text/plain
2576 text/plain
2577 text/plain
2578 text/plain
2579 text/plain
2580 text/plain
2581 text/plain
2582 text/plain
2583 text/plain
2584 text/rfc822-headers
2585""")
2586
R. David Murraya0b44b52010-12-02 21:47:19 +00002587 def test_make_msgid_domain(self):
2588 self.assertEqual(
2589 email.utils.make_msgid(domain='testdomain-string')[-19:],
2590 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002591
Ezio Melottib3aedd42010-11-20 19:04:17 +00002592
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002593# Test the iterator/generators
2594class TestIterators(TestEmailBase):
2595 def test_body_line_iterator(self):
2596 eq = self.assertEqual
2597 neq = self.ndiffAssertEqual
2598 # First a simple non-multipart message
2599 msg = self._msgobj('msg_01.txt')
2600 it = iterators.body_line_iterator(msg)
2601 lines = list(it)
2602 eq(len(lines), 6)
2603 neq(EMPTYSTRING.join(lines), msg.get_payload())
2604 # Now a more complicated multipart
2605 msg = self._msgobj('msg_02.txt')
2606 it = iterators.body_line_iterator(msg)
2607 lines = list(it)
2608 eq(len(lines), 43)
2609 with openfile('msg_19.txt') as fp:
2610 neq(EMPTYSTRING.join(lines), fp.read())
2611
2612 def test_typed_subpart_iterator(self):
2613 eq = self.assertEqual
2614 msg = self._msgobj('msg_04.txt')
2615 it = iterators.typed_subpart_iterator(msg, 'text')
2616 lines = []
2617 subparts = 0
2618 for subpart in it:
2619 subparts += 1
2620 lines.append(subpart.get_payload())
2621 eq(subparts, 2)
2622 eq(EMPTYSTRING.join(lines), """\
2623a simple kind of mirror
2624to reflect upon our own
2625a simple kind of mirror
2626to reflect upon our own
2627""")
2628
2629 def test_typed_subpart_iterator_default_type(self):
2630 eq = self.assertEqual
2631 msg = self._msgobj('msg_03.txt')
2632 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2633 lines = []
2634 subparts = 0
2635 for subpart in it:
2636 subparts += 1
2637 lines.append(subpart.get_payload())
2638 eq(subparts, 1)
2639 eq(EMPTYSTRING.join(lines), """\
2640
2641Hi,
2642
2643Do you like this message?
2644
2645-Me
2646""")
2647
R. David Murray45bf773f2010-07-17 01:19:57 +00002648 def test_pushCR_LF(self):
2649 '''FeedParser BufferedSubFile.push() assumed it received complete
2650 line endings. A CR ending one push() followed by a LF starting
2651 the next push() added an empty line.
2652 '''
2653 imt = [
2654 ("a\r \n", 2),
2655 ("b", 0),
2656 ("c\n", 1),
2657 ("", 0),
2658 ("d\r\n", 1),
2659 ("e\r", 0),
2660 ("\nf", 1),
2661 ("\r\n", 1),
2662 ]
2663 from email.feedparser import BufferedSubFile, NeedMoreData
2664 bsf = BufferedSubFile()
2665 om = []
2666 nt = 0
2667 for il, n in imt:
2668 bsf.push(il)
2669 nt += n
2670 n1 = 0
2671 while True:
2672 ol = bsf.readline()
2673 if ol == NeedMoreData:
2674 break
2675 om.append(ol)
2676 n1 += 1
2677 self.assertTrue(n == n1)
2678 self.assertTrue(len(om) == nt)
2679 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2680
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002681
Ezio Melottib3aedd42010-11-20 19:04:17 +00002682
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002683class TestParsers(TestEmailBase):
2684 def test_header_parser(self):
2685 eq = self.assertEqual
2686 # Parse only the headers of a complex multipart MIME document
2687 with openfile('msg_02.txt') as fp:
2688 msg = HeaderParser().parse(fp)
2689 eq(msg['from'], 'ppp-request@zzz.org')
2690 eq(msg['to'], 'ppp@zzz.org')
2691 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002692 self.assertFalse(msg.is_multipart())
2693 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002694
2695 def test_whitespace_continuation(self):
2696 eq = self.assertEqual
2697 # This message contains a line after the Subject: header that has only
2698 # whitespace, but it is not empty!
2699 msg = email.message_from_string("""\
2700From: aperson@dom.ain
2701To: bperson@dom.ain
2702Subject: the next line has a space on it
2703\x20
2704Date: Mon, 8 Apr 2002 15:09:19 -0400
2705Message-ID: spam
2706
2707Here's the message body
2708""")
2709 eq(msg['subject'], 'the next line has a space on it\n ')
2710 eq(msg['message-id'], 'spam')
2711 eq(msg.get_payload(), "Here's the message body\n")
2712
2713 def test_whitespace_continuation_last_header(self):
2714 eq = self.assertEqual
2715 # Like the previous test, but the subject line is the last
2716 # header.
2717 msg = email.message_from_string("""\
2718From: aperson@dom.ain
2719To: bperson@dom.ain
2720Date: Mon, 8 Apr 2002 15:09:19 -0400
2721Message-ID: spam
2722Subject: the next line has a space on it
2723\x20
2724
2725Here's the message body
2726""")
2727 eq(msg['subject'], 'the next line has a space on it\n ')
2728 eq(msg['message-id'], 'spam')
2729 eq(msg.get_payload(), "Here's the message body\n")
2730
2731 def test_crlf_separation(self):
2732 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002733 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002734 msg = Parser().parse(fp)
2735 eq(len(msg.get_payload()), 2)
2736 part1 = msg.get_payload(0)
2737 eq(part1.get_content_type(), 'text/plain')
2738 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2739 part2 = msg.get_payload(1)
2740 eq(part2.get_content_type(), 'application/riscos')
2741
R. David Murray8451c4b2010-10-23 22:19:56 +00002742 def test_crlf_flatten(self):
2743 # Using newline='\n' preserves the crlfs in this input file.
2744 with openfile('msg_26.txt', newline='\n') as fp:
2745 text = fp.read()
2746 msg = email.message_from_string(text)
2747 s = StringIO()
2748 g = Generator(s)
2749 g.flatten(msg, linesep='\r\n')
2750 self.assertEqual(s.getvalue(), text)
2751
2752 maxDiff = None
2753
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002754 def test_multipart_digest_with_extra_mime_headers(self):
2755 eq = self.assertEqual
2756 neq = self.ndiffAssertEqual
2757 with openfile('msg_28.txt') as fp:
2758 msg = email.message_from_file(fp)
2759 # Structure is:
2760 # multipart/digest
2761 # message/rfc822
2762 # text/plain
2763 # message/rfc822
2764 # text/plain
2765 eq(msg.is_multipart(), 1)
2766 eq(len(msg.get_payload()), 2)
2767 part1 = msg.get_payload(0)
2768 eq(part1.get_content_type(), 'message/rfc822')
2769 eq(part1.is_multipart(), 1)
2770 eq(len(part1.get_payload()), 1)
2771 part1a = part1.get_payload(0)
2772 eq(part1a.is_multipart(), 0)
2773 eq(part1a.get_content_type(), 'text/plain')
2774 neq(part1a.get_payload(), 'message 1\n')
2775 # next message/rfc822
2776 part2 = msg.get_payload(1)
2777 eq(part2.get_content_type(), 'message/rfc822')
2778 eq(part2.is_multipart(), 1)
2779 eq(len(part2.get_payload()), 1)
2780 part2a = part2.get_payload(0)
2781 eq(part2a.is_multipart(), 0)
2782 eq(part2a.get_content_type(), 'text/plain')
2783 neq(part2a.get_payload(), 'message 2\n')
2784
2785 def test_three_lines(self):
2786 # A bug report by Andrew McNamara
2787 lines = ['From: Andrew Person <aperson@dom.ain',
2788 'Subject: Test',
2789 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2790 msg = email.message_from_string(NL.join(lines))
2791 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
2792
2793 def test_strip_line_feed_and_carriage_return_in_headers(self):
2794 eq = self.assertEqual
2795 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
2796 value1 = 'text'
2797 value2 = 'more text'
2798 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
2799 value1, value2)
2800 msg = email.message_from_string(m)
2801 eq(msg.get('Header'), value1)
2802 eq(msg.get('Next-Header'), value2)
2803
2804 def test_rfc2822_header_syntax(self):
2805 eq = self.assertEqual
2806 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2807 msg = email.message_from_string(m)
2808 eq(len(msg), 3)
2809 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
2810 eq(msg.get_payload(), 'body')
2811
2812 def test_rfc2822_space_not_allowed_in_header(self):
2813 eq = self.assertEqual
2814 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2815 msg = email.message_from_string(m)
2816 eq(len(msg.keys()), 0)
2817
2818 def test_rfc2822_one_character_header(self):
2819 eq = self.assertEqual
2820 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
2821 msg = email.message_from_string(m)
2822 headers = msg.keys()
2823 headers.sort()
2824 eq(headers, ['A', 'B', 'CC'])
2825 eq(msg.get_payload(), 'body')
2826
R. David Murray45e0e142010-06-16 02:19:40 +00002827 def test_CRLFLF_at_end_of_part(self):
2828 # issue 5610: feedparser should not eat two chars from body part ending
2829 # with "\r\n\n".
2830 m = (
2831 "From: foo@bar.com\n"
2832 "To: baz\n"
2833 "Mime-Version: 1.0\n"
2834 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
2835 "\n"
2836 "--BOUNDARY\n"
2837 "Content-Type: text/plain\n"
2838 "\n"
2839 "body ending with CRLF newline\r\n"
2840 "\n"
2841 "--BOUNDARY--\n"
2842 )
2843 msg = email.message_from_string(m)
2844 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002845
Ezio Melottib3aedd42010-11-20 19:04:17 +00002846
R. David Murray96fd54e2010-10-08 15:55:28 +00002847class Test8BitBytesHandling(unittest.TestCase):
2848 # In Python3 all input is string, but that doesn't work if the actual input
2849 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
2850 # decode byte streams using the surrogateescape error handler, and
2851 # reconvert to binary at appropriate places if we detect surrogates. This
2852 # doesn't allow us to transform headers with 8bit bytes (they get munged),
2853 # but it does allow us to parse and preserve them, and to decode body
2854 # parts that use an 8bit CTE.
2855
2856 bodytest_msg = textwrap.dedent("""\
2857 From: foo@bar.com
2858 To: baz
2859 Mime-Version: 1.0
2860 Content-Type: text/plain; charset={charset}
2861 Content-Transfer-Encoding: {cte}
2862
2863 {bodyline}
2864 """)
2865
2866 def test_known_8bit_CTE(self):
2867 m = self.bodytest_msg.format(charset='utf-8',
2868 cte='8bit',
2869 bodyline='pöstal').encode('utf-8')
2870 msg = email.message_from_bytes(m)
2871 self.assertEqual(msg.get_payload(), "pöstal\n")
2872 self.assertEqual(msg.get_payload(decode=True),
2873 "pöstal\n".encode('utf-8'))
2874
2875 def test_unknown_8bit_CTE(self):
2876 m = self.bodytest_msg.format(charset='notavalidcharset',
2877 cte='8bit',
2878 bodyline='pöstal').encode('utf-8')
2879 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00002880 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00002881 self.assertEqual(msg.get_payload(decode=True),
2882 "pöstal\n".encode('utf-8'))
2883
2884 def test_8bit_in_quopri_body(self):
2885 # This is non-RFC compliant data...without 'decode' the library code
2886 # decodes the body using the charset from the headers, and because the
2887 # source byte really is utf-8 this works. This is likely to fail
2888 # against real dirty data (ie: produce mojibake), but the data is
2889 # invalid anyway so it is as good a guess as any. But this means that
2890 # this test just confirms the current behavior; that behavior is not
2891 # necessarily the best possible behavior. With 'decode' it is
2892 # returning the raw bytes, so that test should be of correct behavior,
2893 # or at least produce the same result that email4 did.
2894 m = self.bodytest_msg.format(charset='utf-8',
2895 cte='quoted-printable',
2896 bodyline='p=C3=B6stál').encode('utf-8')
2897 msg = email.message_from_bytes(m)
2898 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
2899 self.assertEqual(msg.get_payload(decode=True),
2900 'pöstál\n'.encode('utf-8'))
2901
2902 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
2903 # This is similar to the previous test, but proves that if the 8bit
2904 # byte is undecodeable in the specified charset, it gets replaced
2905 # by the unicode 'unknown' character. Again, this may or may not
2906 # be the ideal behavior. Note that if decode=False none of the
2907 # decoders will get involved, so this is the only test we need
2908 # for this behavior.
2909 m = self.bodytest_msg.format(charset='ascii',
2910 cte='quoted-printable',
2911 bodyline='p=C3=B6stál').encode('utf-8')
2912 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00002913 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00002914 self.assertEqual(msg.get_payload(decode=True),
2915 'pöstál\n'.encode('utf-8'))
2916
2917 def test_8bit_in_base64_body(self):
2918 # Sticking an 8bit byte in a base64 block makes it undecodable by
2919 # normal means, so the block is returned undecoded, but as bytes.
2920 m = self.bodytest_msg.format(charset='utf-8',
2921 cte='base64',
2922 bodyline='cMO2c3RhbAá=').encode('utf-8')
2923 msg = email.message_from_bytes(m)
2924 self.assertEqual(msg.get_payload(decode=True),
2925 'cMO2c3RhbAá=\n'.encode('utf-8'))
2926
2927 def test_8bit_in_uuencode_body(self):
2928 # Sticking an 8bit byte in a uuencode block makes it undecodable by
2929 # normal means, so the block is returned undecoded, but as bytes.
2930 m = self.bodytest_msg.format(charset='utf-8',
2931 cte='uuencode',
2932 bodyline='<,.V<W1A; á ').encode('utf-8')
2933 msg = email.message_from_bytes(m)
2934 self.assertEqual(msg.get_payload(decode=True),
2935 '<,.V<W1A; á \n'.encode('utf-8'))
2936
2937
R. David Murray92532142011-01-07 23:25:30 +00002938 headertest_headers = (
2939 ('From: foo@bar.com', ('From', 'foo@bar.com')),
2940 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
2941 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
2942 '\tJean de Baddie',
2943 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
2944 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
2945 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
2946 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
2947 )
2948 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
2949 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00002950
2951 def test_get_8bit_header(self):
2952 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00002953 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
2954 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00002955
2956 def test_print_8bit_headers(self):
2957 msg = email.message_from_bytes(self.headertest_msg)
2958 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00002959 textwrap.dedent("""\
2960 From: {}
2961 To: {}
2962 Subject: {}
2963 From: {}
2964
2965 Yes, they are flying.
2966 """).format(*[expected[1] for (_, expected) in
2967 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00002968
2969 def test_values_with_8bit_headers(self):
2970 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00002971 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00002972 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00002973 'b\uFFFD\uFFFDz',
2974 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
2975 'coll\uFFFD\uFFFDgue, le pouf '
2976 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00002977 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00002978 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00002979
2980 def test_items_with_8bit_headers(self):
2981 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00002982 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00002983 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00002984 ('To', 'b\uFFFD\uFFFDz'),
2985 ('Subject', 'Maintenant je vous '
2986 'pr\uFFFD\uFFFDsente '
2987 'mon coll\uFFFD\uFFFDgue, le pouf '
2988 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
2989 '\tJean de Baddie'),
2990 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00002991
2992 def test_get_all_with_8bit_headers(self):
2993 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00002994 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00002995 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00002996 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00002997
R David Murraya2150232011-03-16 21:11:23 -04002998 def test_get_content_type_with_8bit(self):
2999 msg = email.message_from_bytes(textwrap.dedent("""\
3000 Content-Type: text/pl\xA7in; charset=utf-8
3001 """).encode('latin-1'))
3002 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3003 self.assertEqual(msg.get_content_maintype(), "text")
3004 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3005
3006 def test_get_params_with_8bit(self):
3007 msg = email.message_from_bytes(
3008 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3009 self.assertEqual(msg.get_params(header='x-header'),
3010 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3011 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3012 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3013 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3014
3015 def test_get_rfc2231_params_with_8bit(self):
3016 msg = email.message_from_bytes(textwrap.dedent("""\
3017 Content-Type: text/plain; charset=us-ascii;
3018 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3019 ).encode('latin-1'))
3020 self.assertEqual(msg.get_param('title'),
3021 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3022
3023 def test_set_rfc2231_params_with_8bit(self):
3024 msg = email.message_from_bytes(textwrap.dedent("""\
3025 Content-Type: text/plain; charset=us-ascii;
3026 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3027 ).encode('latin-1'))
3028 msg.set_param('title', 'test')
3029 self.assertEqual(msg.get_param('title'), 'test')
3030
3031 def test_del_rfc2231_params_with_8bit(self):
3032 msg = email.message_from_bytes(textwrap.dedent("""\
3033 Content-Type: text/plain; charset=us-ascii;
3034 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3035 ).encode('latin-1'))
3036 msg.del_param('title')
3037 self.assertEqual(msg.get_param('title'), None)
3038 self.assertEqual(msg.get_content_maintype(), 'text')
3039
3040 def test_get_payload_with_8bit_cte_header(self):
3041 msg = email.message_from_bytes(textwrap.dedent("""\
3042 Content-Transfer-Encoding: b\xa7se64
3043 Content-Type: text/plain; charset=latin-1
3044
3045 payload
3046 """).encode('latin-1'))
3047 self.assertEqual(msg.get_payload(), 'payload\n')
3048 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3049
R. David Murray96fd54e2010-10-08 15:55:28 +00003050 non_latin_bin_msg = textwrap.dedent("""\
3051 From: foo@bar.com
3052 To: báz
3053 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3054 \tJean de Baddie
3055 Mime-Version: 1.0
3056 Content-Type: text/plain; charset="utf-8"
3057 Content-Transfer-Encoding: 8bit
3058
3059 Да, они летят.
3060 """).encode('utf-8')
3061
3062 def test_bytes_generator(self):
3063 msg = email.message_from_bytes(self.non_latin_bin_msg)
3064 out = BytesIO()
3065 email.generator.BytesGenerator(out).flatten(msg)
3066 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3067
R. David Murray7372a072011-01-26 21:21:32 +00003068 def test_bytes_generator_handles_None_body(self):
3069 #Issue 11019
3070 msg = email.message.Message()
3071 out = BytesIO()
3072 email.generator.BytesGenerator(out).flatten(msg)
3073 self.assertEqual(out.getvalue(), b"\n")
3074
R. David Murray92532142011-01-07 23:25:30 +00003075 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003076 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003077 To: =?unknown-8bit?q?b=C3=A1z?=
3078 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3079 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3080 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003081 Mime-Version: 1.0
3082 Content-Type: text/plain; charset="utf-8"
3083 Content-Transfer-Encoding: base64
3084
3085 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3086 """)
3087
3088 def test_generator_handles_8bit(self):
3089 msg = email.message_from_bytes(self.non_latin_bin_msg)
3090 out = StringIO()
3091 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003092 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003093
3094 def test_bytes_generator_with_unix_from(self):
3095 # The unixfrom contains a current date, so we can't check it
3096 # literally. Just make sure the first word is 'From' and the
3097 # rest of the message matches the input.
3098 msg = email.message_from_bytes(self.non_latin_bin_msg)
3099 out = BytesIO()
3100 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3101 lines = out.getvalue().split(b'\n')
3102 self.assertEqual(lines[0].split()[0], b'From')
3103 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3104
R. David Murray92532142011-01-07 23:25:30 +00003105 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3106 non_latin_bin_msg_as7bit[2:4] = [
3107 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3108 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3109 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3110
R. David Murray96fd54e2010-10-08 15:55:28 +00003111 def test_message_from_binary_file(self):
3112 fn = 'test.msg'
3113 self.addCleanup(unlink, fn)
3114 with open(fn, 'wb') as testfile:
3115 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003116 with open(fn, 'rb') as testfile:
3117 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003118 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3119
3120 latin_bin_msg = textwrap.dedent("""\
3121 From: foo@bar.com
3122 To: Dinsdale
3123 Subject: Nudge nudge, wink, wink
3124 Mime-Version: 1.0
3125 Content-Type: text/plain; charset="latin-1"
3126 Content-Transfer-Encoding: 8bit
3127
3128 oh là là, know what I mean, know what I mean?
3129 """).encode('latin-1')
3130
3131 latin_bin_msg_as7bit = textwrap.dedent("""\
3132 From: foo@bar.com
3133 To: Dinsdale
3134 Subject: Nudge nudge, wink, wink
3135 Mime-Version: 1.0
3136 Content-Type: text/plain; charset="iso-8859-1"
3137 Content-Transfer-Encoding: quoted-printable
3138
3139 oh l=E0 l=E0, know what I mean, know what I mean?
3140 """)
3141
3142 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3143 m = email.message_from_bytes(self.latin_bin_msg)
3144 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3145
3146 def test_decoded_generator_emits_unicode_body(self):
3147 m = email.message_from_bytes(self.latin_bin_msg)
3148 out = StringIO()
3149 email.generator.DecodedGenerator(out).flatten(m)
3150 #DecodedHeader output contains an extra blank line compared
3151 #to the input message. RDM: not sure if this is a bug or not,
3152 #but it is not specific to the 8bit->7bit conversion.
3153 self.assertEqual(out.getvalue(),
3154 self.latin_bin_msg.decode('latin-1')+'\n')
3155
3156 def test_bytes_feedparser(self):
3157 bfp = email.feedparser.BytesFeedParser()
3158 for i in range(0, len(self.latin_bin_msg), 10):
3159 bfp.feed(self.latin_bin_msg[i:i+10])
3160 m = bfp.close()
3161 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3162
R. David Murray8451c4b2010-10-23 22:19:56 +00003163 def test_crlf_flatten(self):
3164 with openfile('msg_26.txt', 'rb') as fp:
3165 text = fp.read()
3166 msg = email.message_from_bytes(text)
3167 s = BytesIO()
3168 g = email.generator.BytesGenerator(s)
3169 g.flatten(msg, linesep='\r\n')
3170 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003171
3172 def test_8bit_multipart(self):
3173 # Issue 11605
3174 source = textwrap.dedent("""\
3175 Date: Fri, 18 Mar 2011 17:15:43 +0100
3176 To: foo@example.com
3177 From: foodwatch-Newsletter <bar@example.com>
3178 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3179 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3180 MIME-Version: 1.0
3181 Content-Type: multipart/alternative;
3182 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3183
3184 --b1_76a486bee62b0d200f33dc2ca08220ad
3185 Content-Type: text/plain; charset="utf-8"
3186 Content-Transfer-Encoding: 8bit
3187
3188 Guten Tag, ,
3189
3190 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3191 Nachrichten aus Japan.
3192
3193
3194 --b1_76a486bee62b0d200f33dc2ca08220ad
3195 Content-Type: text/html; charset="utf-8"
3196 Content-Transfer-Encoding: 8bit
3197
3198 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3199 "http://www.w3.org/TR/html4/loose.dtd">
3200 <html lang="de">
3201 <head>
3202 <title>foodwatch - Newsletter</title>
3203 </head>
3204 <body>
3205 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3206 die Nachrichten aus Japan.</p>
3207 </body>
3208 </html>
3209 --b1_76a486bee62b0d200f33dc2ca08220ad--
3210
3211 """).encode('utf-8')
3212 msg = email.message_from_bytes(source)
3213 s = BytesIO()
3214 g = email.generator.BytesGenerator(s)
3215 g.flatten(msg)
3216 self.assertEqual(s.getvalue(), source)
3217
R. David Murray8451c4b2010-10-23 22:19:56 +00003218 maxDiff = None
3219
Ezio Melottib3aedd42010-11-20 19:04:17 +00003220
R. David Murray719a4492010-11-21 16:53:48 +00003221class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003222
R. David Murraye5db2632010-11-20 15:10:13 +00003223 maxDiff = None
3224
R. David Murray96fd54e2010-10-08 15:55:28 +00003225 def _msgobj(self, filename):
3226 with openfile(filename, 'rb') as fp:
3227 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003228 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003229 msg = email.message_from_bytes(data)
3230 return msg, data
3231
R. David Murray719a4492010-11-21 16:53:48 +00003232 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003233 b = BytesIO()
3234 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003235 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R. David Murraye5db2632010-11-20 15:10:13 +00003236 self.assertByteStringsEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003237
R. David Murraye5db2632010-11-20 15:10:13 +00003238 def assertByteStringsEqual(self, str1, str2):
R. David Murray719a4492010-11-21 16:53:48 +00003239 # Not using self.blinesep here is intentional. This way the output
3240 # is more useful when the failure results in mixed line endings.
R. David Murray96fd54e2010-10-08 15:55:28 +00003241 self.assertListEqual(str1.split(b'\n'), str2.split(b'\n'))
3242
3243
R. David Murray719a4492010-11-21 16:53:48 +00003244class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3245 TestIdempotent):
3246 linesep = '\n'
3247 blinesep = b'\n'
3248 normalize_linesep_regex = re.compile(br'\r\n')
3249
3250
3251class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3252 TestIdempotent):
3253 linesep = '\r\n'
3254 blinesep = b'\r\n'
3255 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3256
Ezio Melottib3aedd42010-11-20 19:04:17 +00003257
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003258class TestBase64(unittest.TestCase):
3259 def test_len(self):
3260 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003261 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003262 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003263 for size in range(15):
3264 if size == 0 : bsize = 0
3265 elif size <= 3 : bsize = 4
3266 elif size <= 6 : bsize = 8
3267 elif size <= 9 : bsize = 12
3268 elif size <= 12: bsize = 16
3269 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003270 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003271
3272 def test_decode(self):
3273 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003274 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003275 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003276
3277 def test_encode(self):
3278 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003279 eq(base64mime.body_encode(b''), b'')
3280 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003281 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003282 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003283 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003284 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003285eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3286eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3287eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3288eHh4eCB4eHh4IA==
3289""")
3290 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003291 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003292 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003293eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3294eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3295eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3296eHh4eCB4eHh4IA==\r
3297""")
3298
3299 def test_header_encode(self):
3300 eq = self.assertEqual
3301 he = base64mime.header_encode
3302 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003303 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3304 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003305 # Test the charset option
3306 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3307 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003308
3309
Ezio Melottib3aedd42010-11-20 19:04:17 +00003310
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003311class TestQuopri(unittest.TestCase):
3312 def setUp(self):
3313 # Set of characters (as byte integers) that don't need to be encoded
3314 # in headers.
3315 self.hlit = list(chain(
3316 range(ord('a'), ord('z') + 1),
3317 range(ord('A'), ord('Z') + 1),
3318 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003319 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003320 # Set of characters (as byte integers) that do need to be encoded in
3321 # headers.
3322 self.hnon = [c for c in range(256) if c not in self.hlit]
3323 assert len(self.hlit) + len(self.hnon) == 256
3324 # Set of characters (as byte integers) that don't need to be encoded
3325 # in bodies.
3326 self.blit = list(range(ord(' '), ord('~') + 1))
3327 self.blit.append(ord('\t'))
3328 self.blit.remove(ord('='))
3329 # Set of characters (as byte integers) that do need to be encoded in
3330 # bodies.
3331 self.bnon = [c for c in range(256) if c not in self.blit]
3332 assert len(self.blit) + len(self.bnon) == 256
3333
Guido van Rossum9604e662007-08-30 03:46:43 +00003334 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003335 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003336 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003337 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003338 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003339 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003340 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003341
Guido van Rossum9604e662007-08-30 03:46:43 +00003342 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003343 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003344 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003345 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003346 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003347 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003348 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003349
3350 def test_header_quopri_len(self):
3351 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003352 eq(quoprimime.header_length(b'hello'), 5)
3353 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003354 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003355 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003356 # =?xxx?q?...?= means 10 extra characters
3357 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003358 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3359 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003360 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003361 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003362 # =?xxx?q?...?= means 10 extra characters
3363 10)
3364 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003365 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003366 'expected length 1 for %r' % chr(c))
3367 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003368 # Space is special; it's encoded to _
3369 if c == ord(' '):
3370 continue
3371 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003372 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003373 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003374
3375 def test_body_quopri_len(self):
3376 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003377 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003378 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003379 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003380 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003381
3382 def test_quote_unquote_idempotent(self):
3383 for x in range(256):
3384 c = chr(x)
3385 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3386
R David Murrayec1b5b82011-03-23 14:19:05 -04003387 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3388 if charset is None:
3389 encoded_header = quoprimime.header_encode(header)
3390 else:
3391 encoded_header = quoprimime.header_encode(header, charset)
3392 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003393
R David Murraycafd79d2011-03-23 15:25:55 -04003394 def test_header_encode_null(self):
3395 self._test_header_encode(b'', '')
3396
R David Murrayec1b5b82011-03-23 14:19:05 -04003397 def test_header_encode_one_word(self):
3398 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3399
3400 def test_header_encode_two_lines(self):
3401 self._test_header_encode(b'hello\nworld',
3402 '=?iso-8859-1?q?hello=0Aworld?=')
3403
3404 def test_header_encode_non_ascii(self):
3405 self._test_header_encode(b'hello\xc7there',
3406 '=?iso-8859-1?q?hello=C7there?=')
3407
3408 def test_header_encode_alt_charset(self):
3409 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3410 charset='iso-8859-2')
3411
3412 def _test_header_decode(self, encoded_header, expected_decoded_header):
3413 decoded_header = quoprimime.header_decode(encoded_header)
3414 self.assertEqual(decoded_header, expected_decoded_header)
3415
3416 def test_header_decode_null(self):
3417 self._test_header_decode('', '')
3418
3419 def test_header_decode_one_word(self):
3420 self._test_header_decode('hello', 'hello')
3421
3422 def test_header_decode_two_lines(self):
3423 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3424
3425 def test_header_decode_non_ascii(self):
3426 self._test_header_decode('hello=C7there', 'hello\xc7there')
3427
3428 def _test_decode(self, encoded, expected_decoded, eol=None):
3429 if eol is None:
3430 decoded = quoprimime.decode(encoded)
3431 else:
3432 decoded = quoprimime.decode(encoded, eol=eol)
3433 self.assertEqual(decoded, expected_decoded)
3434
3435 def test_decode_null_word(self):
3436 self._test_decode('', '')
3437
3438 def test_decode_null_line_null_word(self):
3439 self._test_decode('\r\n', '\n')
3440
3441 def test_decode_one_word(self):
3442 self._test_decode('hello', 'hello')
3443
3444 def test_decode_one_word_eol(self):
3445 self._test_decode('hello', 'hello', eol='X')
3446
3447 def test_decode_one_line(self):
3448 self._test_decode('hello\r\n', 'hello\n')
3449
3450 def test_decode_one_line_lf(self):
3451 self._test_decode('hello\n', 'hello\n')
3452
R David Murraycafd79d2011-03-23 15:25:55 -04003453 def test_decode_one_line_cr(self):
3454 self._test_decode('hello\r', 'hello\n')
3455
3456 def test_decode_one_line_nl(self):
3457 self._test_decode('hello\n', 'helloX', eol='X')
3458
3459 def test_decode_one_line_crnl(self):
3460 self._test_decode('hello\r\n', 'helloX', eol='X')
3461
R David Murrayec1b5b82011-03-23 14:19:05 -04003462 def test_decode_one_line_one_word(self):
3463 self._test_decode('hello\r\nworld', 'hello\nworld')
3464
3465 def test_decode_one_line_one_word_eol(self):
3466 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3467
3468 def test_decode_two_lines(self):
3469 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3470
R David Murraycafd79d2011-03-23 15:25:55 -04003471 def test_decode_two_lines_eol(self):
3472 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3473
R David Murrayec1b5b82011-03-23 14:19:05 -04003474 def test_decode_one_long_line(self):
3475 self._test_decode('Spam' * 250, 'Spam' * 250)
3476
3477 def test_decode_one_space(self):
3478 self._test_decode(' ', '')
3479
3480 def test_decode_multiple_spaces(self):
3481 self._test_decode(' ' * 5, '')
3482
3483 def test_decode_one_line_trailing_spaces(self):
3484 self._test_decode('hello \r\n', 'hello\n')
3485
3486 def test_decode_two_lines_trailing_spaces(self):
3487 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3488
3489 def test_decode_quoted_word(self):
3490 self._test_decode('=22quoted=20words=22', '"quoted words"')
3491
3492 def test_decode_uppercase_quoting(self):
3493 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
3494
3495 def test_decode_lowercase_quoting(self):
3496 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
3497
3498 def test_decode_soft_line_break(self):
3499 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
3500
3501 def test_decode_false_quoting(self):
3502 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
3503
3504 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
3505 kwargs = {}
3506 if maxlinelen is None:
3507 # Use body_encode's default.
3508 maxlinelen = 76
3509 else:
3510 kwargs['maxlinelen'] = maxlinelen
3511 if eol is None:
3512 # Use body_encode's default.
3513 eol = '\n'
3514 else:
3515 kwargs['eol'] = eol
3516 encoded_body = quoprimime.body_encode(body, **kwargs)
3517 self.assertEqual(encoded_body, expected_encoded_body)
3518 if eol == '\n' or eol == '\r\n':
3519 # We know how to split the result back into lines, so maxlinelen
3520 # can be checked.
3521 for line in encoded_body.splitlines():
3522 self.assertLessEqual(len(line), maxlinelen)
3523
3524 def test_encode_null(self):
3525 self._test_encode('', '')
3526
3527 def test_encode_null_lines(self):
3528 self._test_encode('\n\n', '\n\n')
3529
3530 def test_encode_one_line(self):
3531 self._test_encode('hello\n', 'hello\n')
3532
3533 def test_encode_one_line_crlf(self):
3534 self._test_encode('hello\r\n', 'hello\n')
3535
3536 def test_encode_one_line_eol(self):
3537 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
3538
3539 def test_encode_one_space(self):
3540 self._test_encode(' ', '=20')
3541
3542 def test_encode_one_line_one_space(self):
3543 self._test_encode(' \n', '=20\n')
3544
R David Murrayb938c8c2011-03-24 12:19:26 -04003545# XXX: body_encode() expect strings, but uses ord(char) from these strings
3546# to index into a 256-entry list. For code points above 255, this will fail.
3547# Should there be a check for 8-bit only ord() values in body, or at least
3548# a comment about the expected input?
3549
3550 def test_encode_two_lines_one_space(self):
3551 self._test_encode(' \n \n', '=20\n=20\n')
3552
R David Murrayec1b5b82011-03-23 14:19:05 -04003553 def test_encode_one_word_trailing_spaces(self):
3554 self._test_encode('hello ', 'hello =20')
3555
3556 def test_encode_one_line_trailing_spaces(self):
3557 self._test_encode('hello \n', 'hello =20\n')
3558
3559 def test_encode_one_word_trailing_tab(self):
3560 self._test_encode('hello \t', 'hello =09')
3561
3562 def test_encode_one_line_trailing_tab(self):
3563 self._test_encode('hello \t\n', 'hello =09\n')
3564
3565 def test_encode_trailing_space_before_maxlinelen(self):
3566 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
3567
R David Murrayb938c8c2011-03-24 12:19:26 -04003568 def test_encode_trailing_space_at_maxlinelen(self):
3569 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
3570
R David Murrayec1b5b82011-03-23 14:19:05 -04003571 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04003572 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
3573
3574 def test_encode_whitespace_lines(self):
3575 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04003576
3577 def test_encode_quoted_equals(self):
3578 self._test_encode('a = b', 'a =3D b')
3579
3580 def test_encode_one_long_string(self):
3581 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
3582
3583 def test_encode_one_long_line(self):
3584 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3585
3586 def test_encode_one_very_long_line(self):
3587 self._test_encode('x' * 200 + '\n',
3588 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
3589
3590 def test_encode_one_long_line(self):
3591 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3592
3593 def test_encode_shortest_maxlinelen(self):
3594 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003595
R David Murrayb938c8c2011-03-24 12:19:26 -04003596 def test_encode_maxlinelen_too_small(self):
3597 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
3598
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003599 def test_encode(self):
3600 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003601 eq(quoprimime.body_encode(''), '')
3602 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003603 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003604 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003605 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003606 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003607xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3608 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3609x xxxx xxxx xxxx xxxx=20""")
3610 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003611 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3612 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003613xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3614 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3615x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003616 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003617one line
3618
3619two line"""), """\
3620one line
3621
3622two line""")
3623
3624
Ezio Melottib3aedd42010-11-20 19:04:17 +00003625
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003626# Test the Charset class
3627class TestCharset(unittest.TestCase):
3628 def tearDown(self):
3629 from email import charset as CharsetModule
3630 try:
3631 del CharsetModule.CHARSETS['fake']
3632 except KeyError:
3633 pass
3634
Guido van Rossum9604e662007-08-30 03:46:43 +00003635 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003636 eq = self.assertEqual
3637 # Make sure us-ascii = no Unicode conversion
3638 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00003639 eq(c.header_encode('Hello World!'), 'Hello World!')
3640 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003641 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00003642 self.assertRaises(UnicodeError, c.header_encode, s)
3643 c = Charset('utf-8')
3644 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003645
3646 def test_body_encode(self):
3647 eq = self.assertEqual
3648 # Try a charset with QP body encoding
3649 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003650 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003651 # Try a charset with Base64 body encoding
3652 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003653 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003654 # Try a charset with None body encoding
3655 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003656 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003657 # Try the convert argument, where input codec != output codec
3658 c = Charset('euc-jp')
3659 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00003660 # XXX FIXME
3661## try:
3662## eq('\x1b$B5FCO;~IW\x1b(B',
3663## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
3664## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
3665## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
3666## except LookupError:
3667## # We probably don't have the Japanese codecs installed
3668## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003669 # Testing SF bug #625509, which we have to fake, since there are no
3670 # built-in encodings where the header encoding is QP but the body
3671 # encoding is not.
3672 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04003673 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003674 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04003675 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003676
3677 def test_unicode_charset_name(self):
3678 charset = Charset('us-ascii')
3679 self.assertEqual(str(charset), 'us-ascii')
3680 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
3681
3682
Ezio Melottib3aedd42010-11-20 19:04:17 +00003683
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003684# Test multilingual MIME headers.
3685class TestHeader(TestEmailBase):
3686 def test_simple(self):
3687 eq = self.ndiffAssertEqual
3688 h = Header('Hello World!')
3689 eq(h.encode(), 'Hello World!')
3690 h.append(' Goodbye World!')
3691 eq(h.encode(), 'Hello World! Goodbye World!')
3692
3693 def test_simple_surprise(self):
3694 eq = self.ndiffAssertEqual
3695 h = Header('Hello World!')
3696 eq(h.encode(), 'Hello World!')
3697 h.append('Goodbye World!')
3698 eq(h.encode(), 'Hello World! Goodbye World!')
3699
3700 def test_header_needs_no_decoding(self):
3701 h = 'no decoding needed'
3702 self.assertEqual(decode_header(h), [(h, None)])
3703
3704 def test_long(self):
3705 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
3706 maxlinelen=76)
3707 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003708 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003709
3710 def test_multilingual(self):
3711 eq = self.ndiffAssertEqual
3712 g = Charset("iso-8859-1")
3713 cz = Charset("iso-8859-2")
3714 utf8 = Charset("utf-8")
3715 g_head = (b'Die Mieter treten hier ein werden mit einem '
3716 b'Foerderband komfortabel den Korridor entlang, '
3717 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
3718 b'gegen die rotierenden Klingen bef\xf6rdert. ')
3719 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
3720 b'd\xf9vtipu.. ')
3721 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
3722 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
3723 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
3724 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
3725 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
3726 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
3727 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
3728 '\u3044\u307e\u3059\u3002')
3729 h = Header(g_head, g)
3730 h.append(cz_head, cz)
3731 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00003732 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003733 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00003734=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
3735 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
3736 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
3737 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003738 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
3739 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
3740 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
3741 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00003742 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
3743 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
3744 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
3745 decoded = decode_header(enc)
3746 eq(len(decoded), 3)
3747 eq(decoded[0], (g_head, 'iso-8859-1'))
3748 eq(decoded[1], (cz_head, 'iso-8859-2'))
3749 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003750 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00003751 eq(ustr,
3752 (b'Die Mieter treten hier ein werden mit einem Foerderband '
3753 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
3754 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
3755 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
3756 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
3757 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
3758 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
3759 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
3760 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
3761 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
3762 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
3763 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
3764 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
3765 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
3766 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
3767 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
3768 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003769 # Test make_header()
3770 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00003771 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003772
3773 def test_empty_header_encode(self):
3774 h = Header()
3775 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00003776
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003777 def test_header_ctor_default_args(self):
3778 eq = self.ndiffAssertEqual
3779 h = Header()
3780 eq(h, '')
3781 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00003782 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003783
3784 def test_explicit_maxlinelen(self):
3785 eq = self.ndiffAssertEqual
3786 hstr = ('A very long line that must get split to something other '
3787 'than at the 76th character boundary to test the non-default '
3788 'behavior')
3789 h = Header(hstr)
3790 eq(h.encode(), '''\
3791A very long line that must get split to something other than at the 76th
3792 character boundary to test the non-default behavior''')
3793 eq(str(h), hstr)
3794 h = Header(hstr, header_name='Subject')
3795 eq(h.encode(), '''\
3796A very long line that must get split to something other than at the
3797 76th character boundary to test the non-default behavior''')
3798 eq(str(h), hstr)
3799 h = Header(hstr, maxlinelen=1024, header_name='Subject')
3800 eq(h.encode(), hstr)
3801 eq(str(h), hstr)
3802
Guido van Rossum9604e662007-08-30 03:46:43 +00003803 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003804 eq = self.ndiffAssertEqual
3805 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003806 x = 'xxxx ' * 20
3807 h.append(x)
3808 s = h.encode()
3809 eq(s, """\
3810=?iso-8859-1?q?xxx?=
3811 =?iso-8859-1?q?x_?=
3812 =?iso-8859-1?q?xx?=
3813 =?iso-8859-1?q?xx?=
3814 =?iso-8859-1?q?_x?=
3815 =?iso-8859-1?q?xx?=
3816 =?iso-8859-1?q?x_?=
3817 =?iso-8859-1?q?xx?=
3818 =?iso-8859-1?q?xx?=
3819 =?iso-8859-1?q?_x?=
3820 =?iso-8859-1?q?xx?=
3821 =?iso-8859-1?q?x_?=
3822 =?iso-8859-1?q?xx?=
3823 =?iso-8859-1?q?xx?=
3824 =?iso-8859-1?q?_x?=
3825 =?iso-8859-1?q?xx?=
3826 =?iso-8859-1?q?x_?=
3827 =?iso-8859-1?q?xx?=
3828 =?iso-8859-1?q?xx?=
3829 =?iso-8859-1?q?_x?=
3830 =?iso-8859-1?q?xx?=
3831 =?iso-8859-1?q?x_?=
3832 =?iso-8859-1?q?xx?=
3833 =?iso-8859-1?q?xx?=
3834 =?iso-8859-1?q?_x?=
3835 =?iso-8859-1?q?xx?=
3836 =?iso-8859-1?q?x_?=
3837 =?iso-8859-1?q?xx?=
3838 =?iso-8859-1?q?xx?=
3839 =?iso-8859-1?q?_x?=
3840 =?iso-8859-1?q?xx?=
3841 =?iso-8859-1?q?x_?=
3842 =?iso-8859-1?q?xx?=
3843 =?iso-8859-1?q?xx?=
3844 =?iso-8859-1?q?_x?=
3845 =?iso-8859-1?q?xx?=
3846 =?iso-8859-1?q?x_?=
3847 =?iso-8859-1?q?xx?=
3848 =?iso-8859-1?q?xx?=
3849 =?iso-8859-1?q?_x?=
3850 =?iso-8859-1?q?xx?=
3851 =?iso-8859-1?q?x_?=
3852 =?iso-8859-1?q?xx?=
3853 =?iso-8859-1?q?xx?=
3854 =?iso-8859-1?q?_x?=
3855 =?iso-8859-1?q?xx?=
3856 =?iso-8859-1?q?x_?=
3857 =?iso-8859-1?q?xx?=
3858 =?iso-8859-1?q?xx?=
3859 =?iso-8859-1?q?_?=""")
3860 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003861 h = Header(charset='iso-8859-1', maxlinelen=40)
3862 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003863 s = h.encode()
3864 eq(s, """\
3865=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
3866 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
3867 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
3868 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
3869 =?iso-8859-1?q?_xxxx_xxxx_?=""")
3870 eq(x, str(make_header(decode_header(s))))
3871
3872 def test_base64_splittable(self):
3873 eq = self.ndiffAssertEqual
3874 h = Header(charset='koi8-r', maxlinelen=20)
3875 x = 'xxxx ' * 20
3876 h.append(x)
3877 s = h.encode()
3878 eq(s, """\
3879=?koi8-r?b?eHh4?=
3880 =?koi8-r?b?eCB4?=
3881 =?koi8-r?b?eHh4?=
3882 =?koi8-r?b?IHh4?=
3883 =?koi8-r?b?eHgg?=
3884 =?koi8-r?b?eHh4?=
3885 =?koi8-r?b?eCB4?=
3886 =?koi8-r?b?eHh4?=
3887 =?koi8-r?b?IHh4?=
3888 =?koi8-r?b?eHgg?=
3889 =?koi8-r?b?eHh4?=
3890 =?koi8-r?b?eCB4?=
3891 =?koi8-r?b?eHh4?=
3892 =?koi8-r?b?IHh4?=
3893 =?koi8-r?b?eHgg?=
3894 =?koi8-r?b?eHh4?=
3895 =?koi8-r?b?eCB4?=
3896 =?koi8-r?b?eHh4?=
3897 =?koi8-r?b?IHh4?=
3898 =?koi8-r?b?eHgg?=
3899 =?koi8-r?b?eHh4?=
3900 =?koi8-r?b?eCB4?=
3901 =?koi8-r?b?eHh4?=
3902 =?koi8-r?b?IHh4?=
3903 =?koi8-r?b?eHgg?=
3904 =?koi8-r?b?eHh4?=
3905 =?koi8-r?b?eCB4?=
3906 =?koi8-r?b?eHh4?=
3907 =?koi8-r?b?IHh4?=
3908 =?koi8-r?b?eHgg?=
3909 =?koi8-r?b?eHh4?=
3910 =?koi8-r?b?eCB4?=
3911 =?koi8-r?b?eHh4?=
3912 =?koi8-r?b?IA==?=""")
3913 eq(x, str(make_header(decode_header(s))))
3914 h = Header(charset='koi8-r', maxlinelen=40)
3915 h.append(x)
3916 s = h.encode()
3917 eq(s, """\
3918=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
3919 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
3920 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
3921 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
3922 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
3923 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
3924 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003925
3926 def test_us_ascii_header(self):
3927 eq = self.assertEqual
3928 s = 'hello'
3929 x = decode_header(s)
3930 eq(x, [('hello', None)])
3931 h = make_header(x)
3932 eq(s, h.encode())
3933
3934 def test_string_charset(self):
3935 eq = self.assertEqual
3936 h = Header()
3937 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00003938 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003939
3940## def test_unicode_error(self):
3941## raises = self.assertRaises
3942## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
3943## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
3944## h = Header()
3945## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
3946## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
3947## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
3948
3949 def test_utf8_shortest(self):
3950 eq = self.assertEqual
3951 h = Header('p\xf6stal', 'utf-8')
3952 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
3953 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
3954 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
3955
3956 def test_bad_8bit_header(self):
3957 raises = self.assertRaises
3958 eq = self.assertEqual
3959 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
3960 raises(UnicodeError, Header, x)
3961 h = Header()
3962 raises(UnicodeError, h.append, x)
3963 e = x.decode('utf-8', 'replace')
3964 eq(str(Header(x, errors='replace')), e)
3965 h.append(x, errors='replace')
3966 eq(str(h), e)
3967
R David Murray041015c2011-03-25 15:10:55 -04003968 def test_escaped_8bit_header(self):
3969 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
3970 x = x.decode('ascii', 'surrogateescape')
3971 h = Header(x, charset=email.charset.UNKNOWN8BIT)
3972 self.assertEqual(str(h),
3973 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
3974 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
3975
3976 def test_modify_returned_list_does_not_change_header(self):
3977 h = Header('test')
3978 chunks = email.header.decode_header(h)
3979 chunks.append(('ascii', 'test2'))
3980 self.assertEqual(str(h), 'test')
3981
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003982 def test_encoded_adjacent_nonencoded(self):
3983 eq = self.assertEqual
3984 h = Header()
3985 h.append('hello', 'iso-8859-1')
3986 h.append('world')
3987 s = h.encode()
3988 eq(s, '=?iso-8859-1?q?hello?= world')
3989 h = make_header(decode_header(s))
3990 eq(h.encode(), s)
3991
3992 def test_whitespace_eater(self):
3993 eq = self.assertEqual
3994 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
3995 parts = decode_header(s)
3996 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
3997 hdr = make_header(parts)
3998 eq(hdr.encode(),
3999 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4000
4001 def test_broken_base64_header(self):
4002 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004003 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004004 raises(errors.HeaderParseError, decode_header, s)
4005
R. David Murray477efb32011-01-05 01:39:32 +00004006 def test_shift_jis_charset(self):
4007 h = Header('文', charset='shift_jis')
4008 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4009
R David Murrayde912762011-03-16 18:26:23 -04004010 def test_flatten_header_with_no_value(self):
4011 # Issue 11401 (regression from email 4.x) Note that the space after
4012 # the header doesn't reflect the input, but this is also the way
4013 # email 4.x behaved. At some point it would be nice to fix that.
4014 msg = email.message_from_string("EmptyHeader:")
4015 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4016
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004017
Ezio Melottib3aedd42010-11-20 19:04:17 +00004018
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004019# Test RFC 2231 header parameters (en/de)coding
4020class TestRFC2231(TestEmailBase):
4021 def test_get_param(self):
4022 eq = self.assertEqual
4023 msg = self._msgobj('msg_29.txt')
4024 eq(msg.get_param('title'),
4025 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4026 eq(msg.get_param('title', unquote=False),
4027 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4028
4029 def test_set_param(self):
4030 eq = self.ndiffAssertEqual
4031 msg = Message()
4032 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4033 charset='us-ascii')
4034 eq(msg.get_param('title'),
4035 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4036 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4037 charset='us-ascii', language='en')
4038 eq(msg.get_param('title'),
4039 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4040 msg = self._msgobj('msg_01.txt')
4041 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4042 charset='us-ascii', language='en')
4043 eq(msg.as_string(maxheaderlen=78), """\
4044Return-Path: <bbb@zzz.org>
4045Delivered-To: bbb@zzz.org
4046Received: by mail.zzz.org (Postfix, from userid 889)
4047\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4048MIME-Version: 1.0
4049Content-Transfer-Encoding: 7bit
4050Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4051From: bbb@ddd.com (John X. Doe)
4052To: bbb@zzz.org
4053Subject: This is a test message
4054Date: Fri, 4 May 2001 14:05:44 -0400
4055Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004056 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004057
4058
4059Hi,
4060
4061Do you like this message?
4062
4063-Me
4064""")
4065
4066 def test_del_param(self):
4067 eq = self.ndiffAssertEqual
4068 msg = self._msgobj('msg_01.txt')
4069 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4070 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4071 charset='us-ascii', language='en')
4072 msg.del_param('foo', header='Content-Type')
4073 eq(msg.as_string(maxheaderlen=78), """\
4074Return-Path: <bbb@zzz.org>
4075Delivered-To: bbb@zzz.org
4076Received: by mail.zzz.org (Postfix, from userid 889)
4077\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4078MIME-Version: 1.0
4079Content-Transfer-Encoding: 7bit
4080Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4081From: bbb@ddd.com (John X. Doe)
4082To: bbb@zzz.org
4083Subject: This is a test message
4084Date: Fri, 4 May 2001 14:05:44 -0400
4085Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004086 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004087
4088
4089Hi,
4090
4091Do you like this message?
4092
4093-Me
4094""")
4095
4096 def test_rfc2231_get_content_charset(self):
4097 eq = self.assertEqual
4098 msg = self._msgobj('msg_32.txt')
4099 eq(msg.get_content_charset(), 'us-ascii')
4100
R. David Murraydfd7eb02010-12-24 22:36:49 +00004101 def test_rfc2231_parse_rfc_quoting(self):
4102 m = textwrap.dedent('''\
4103 Content-Disposition: inline;
4104 \tfilename*0*=''This%20is%20even%20more%20;
4105 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4106 \tfilename*2="is it not.pdf"
4107
4108 ''')
4109 msg = email.message_from_string(m)
4110 self.assertEqual(msg.get_filename(),
4111 'This is even more ***fun*** is it not.pdf')
4112 self.assertEqual(m, msg.as_string())
4113
4114 def test_rfc2231_parse_extra_quoting(self):
4115 m = textwrap.dedent('''\
4116 Content-Disposition: inline;
4117 \tfilename*0*="''This%20is%20even%20more%20";
4118 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4119 \tfilename*2="is it not.pdf"
4120
4121 ''')
4122 msg = email.message_from_string(m)
4123 self.assertEqual(msg.get_filename(),
4124 'This is even more ***fun*** is it not.pdf')
4125 self.assertEqual(m, msg.as_string())
4126
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004127 def test_rfc2231_no_language_or_charset(self):
4128 m = '''\
4129Content-Transfer-Encoding: 8bit
4130Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4131Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4132
4133'''
4134 msg = email.message_from_string(m)
4135 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004136 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004137 self.assertEqual(
4138 param,
4139 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4140
4141 def test_rfc2231_no_language_or_charset_in_filename(self):
4142 m = '''\
4143Content-Disposition: inline;
4144\tfilename*0*="''This%20is%20even%20more%20";
4145\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4146\tfilename*2="is it not.pdf"
4147
4148'''
4149 msg = email.message_from_string(m)
4150 self.assertEqual(msg.get_filename(),
4151 'This is even more ***fun*** is it not.pdf')
4152
4153 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4154 m = '''\
4155Content-Disposition: inline;
4156\tfilename*0*="''This%20is%20even%20more%20";
4157\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4158\tfilename*2="is it not.pdf"
4159
4160'''
4161 msg = email.message_from_string(m)
4162 self.assertEqual(msg.get_filename(),
4163 'This is even more ***fun*** is it not.pdf')
4164
4165 def test_rfc2231_partly_encoded(self):
4166 m = '''\
4167Content-Disposition: inline;
4168\tfilename*0="''This%20is%20even%20more%20";
4169\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4170\tfilename*2="is it not.pdf"
4171
4172'''
4173 msg = email.message_from_string(m)
4174 self.assertEqual(
4175 msg.get_filename(),
4176 'This%20is%20even%20more%20***fun*** is it not.pdf')
4177
4178 def test_rfc2231_partly_nonencoded(self):
4179 m = '''\
4180Content-Disposition: inline;
4181\tfilename*0="This%20is%20even%20more%20";
4182\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4183\tfilename*2="is it not.pdf"
4184
4185'''
4186 msg = email.message_from_string(m)
4187 self.assertEqual(
4188 msg.get_filename(),
4189 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4190
4191 def test_rfc2231_no_language_or_charset_in_boundary(self):
4192 m = '''\
4193Content-Type: multipart/alternative;
4194\tboundary*0*="''This%20is%20even%20more%20";
4195\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4196\tboundary*2="is it not.pdf"
4197
4198'''
4199 msg = email.message_from_string(m)
4200 self.assertEqual(msg.get_boundary(),
4201 'This is even more ***fun*** is it not.pdf')
4202
4203 def test_rfc2231_no_language_or_charset_in_charset(self):
4204 # This is a nonsensical charset value, but tests the code anyway
4205 m = '''\
4206Content-Type: text/plain;
4207\tcharset*0*="This%20is%20even%20more%20";
4208\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4209\tcharset*2="is it not.pdf"
4210
4211'''
4212 msg = email.message_from_string(m)
4213 self.assertEqual(msg.get_content_charset(),
4214 'this is even more ***fun*** is it not.pdf')
4215
4216 def test_rfc2231_bad_encoding_in_filename(self):
4217 m = '''\
4218Content-Disposition: inline;
4219\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4220\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4221\tfilename*2="is it not.pdf"
4222
4223'''
4224 msg = email.message_from_string(m)
4225 self.assertEqual(msg.get_filename(),
4226 'This is even more ***fun*** is it not.pdf')
4227
4228 def test_rfc2231_bad_encoding_in_charset(self):
4229 m = """\
4230Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4231
4232"""
4233 msg = email.message_from_string(m)
4234 # This should return None because non-ascii characters in the charset
4235 # are not allowed.
4236 self.assertEqual(msg.get_content_charset(), None)
4237
4238 def test_rfc2231_bad_character_in_charset(self):
4239 m = """\
4240Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4241
4242"""
4243 msg = email.message_from_string(m)
4244 # This should return None because non-ascii characters in the charset
4245 # are not allowed.
4246 self.assertEqual(msg.get_content_charset(), None)
4247
4248 def test_rfc2231_bad_character_in_filename(self):
4249 m = '''\
4250Content-Disposition: inline;
4251\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4252\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4253\tfilename*2*="is it not.pdf%E2"
4254
4255'''
4256 msg = email.message_from_string(m)
4257 self.assertEqual(msg.get_filename(),
4258 'This is even more ***fun*** is it not.pdf\ufffd')
4259
4260 def test_rfc2231_unknown_encoding(self):
4261 m = """\
4262Content-Transfer-Encoding: 8bit
4263Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4264
4265"""
4266 msg = email.message_from_string(m)
4267 self.assertEqual(msg.get_filename(), 'myfile.txt')
4268
4269 def test_rfc2231_single_tick_in_filename_extended(self):
4270 eq = self.assertEqual
4271 m = """\
4272Content-Type: application/x-foo;
4273\tname*0*=\"Frank's\"; name*1*=\" Document\"
4274
4275"""
4276 msg = email.message_from_string(m)
4277 charset, language, s = msg.get_param('name')
4278 eq(charset, None)
4279 eq(language, None)
4280 eq(s, "Frank's Document")
4281
4282 def test_rfc2231_single_tick_in_filename(self):
4283 m = """\
4284Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4285
4286"""
4287 msg = email.message_from_string(m)
4288 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004289 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004290 self.assertEqual(param, "Frank's Document")
4291
4292 def test_rfc2231_tick_attack_extended(self):
4293 eq = self.assertEqual
4294 m = """\
4295Content-Type: application/x-foo;
4296\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4297
4298"""
4299 msg = email.message_from_string(m)
4300 charset, language, s = msg.get_param('name')
4301 eq(charset, 'us-ascii')
4302 eq(language, 'en-us')
4303 eq(s, "Frank's Document")
4304
4305 def test_rfc2231_tick_attack(self):
4306 m = """\
4307Content-Type: application/x-foo;
4308\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4309
4310"""
4311 msg = email.message_from_string(m)
4312 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004313 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004314 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4315
4316 def test_rfc2231_no_extended_values(self):
4317 eq = self.assertEqual
4318 m = """\
4319Content-Type: application/x-foo; name=\"Frank's Document\"
4320
4321"""
4322 msg = email.message_from_string(m)
4323 eq(msg.get_param('name'), "Frank's Document")
4324
4325 def test_rfc2231_encoded_then_unencoded_segments(self):
4326 eq = self.assertEqual
4327 m = """\
4328Content-Type: application/x-foo;
4329\tname*0*=\"us-ascii'en-us'My\";
4330\tname*1=\" Document\";
4331\tname*2*=\" For You\"
4332
4333"""
4334 msg = email.message_from_string(m)
4335 charset, language, s = msg.get_param('name')
4336 eq(charset, 'us-ascii')
4337 eq(language, 'en-us')
4338 eq(s, 'My Document For You')
4339
4340 def test_rfc2231_unencoded_then_encoded_segments(self):
4341 eq = self.assertEqual
4342 m = """\
4343Content-Type: application/x-foo;
4344\tname*0=\"us-ascii'en-us'My\";
4345\tname*1*=\" Document\";
4346\tname*2*=\" For You\"
4347
4348"""
4349 msg = email.message_from_string(m)
4350 charset, language, s = msg.get_param('name')
4351 eq(charset, 'us-ascii')
4352 eq(language, 'en-us')
4353 eq(s, 'My Document For You')
4354
4355
Ezio Melottib3aedd42010-11-20 19:04:17 +00004356
R. David Murraya8f480f2010-01-16 18:30:03 +00004357# Tests to ensure that signed parts of an email are completely preserved, as
4358# required by RFC1847 section 2.1. Note that these are incomplete, because the
4359# email package does not currently always preserve the body. See issue 1670765.
4360class TestSigned(TestEmailBase):
4361
4362 def _msg_and_obj(self, filename):
4363 with openfile(findfile(filename)) as fp:
4364 original = fp.read()
4365 msg = email.message_from_string(original)
4366 return original, msg
4367
4368 def _signed_parts_eq(self, original, result):
4369 # Extract the first mime part of each message
4370 import re
4371 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4372 inpart = repart.search(original).group(2)
4373 outpart = repart.search(result).group(2)
4374 self.assertEqual(outpart, inpart)
4375
4376 def test_long_headers_as_string(self):
4377 original, msg = self._msg_and_obj('msg_45.txt')
4378 result = msg.as_string()
4379 self._signed_parts_eq(original, result)
4380
4381 def test_long_headers_as_string_maxheaderlen(self):
4382 original, msg = self._msg_and_obj('msg_45.txt')
4383 result = msg.as_string(maxheaderlen=60)
4384 self._signed_parts_eq(original, result)
4385
4386 def test_long_headers_flatten(self):
4387 original, msg = self._msg_and_obj('msg_45.txt')
4388 fp = StringIO()
4389 Generator(fp).flatten(msg)
4390 result = fp.getvalue()
4391 self._signed_parts_eq(original, result)
4392
4393
Ezio Melottib3aedd42010-11-20 19:04:17 +00004394
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004395def _testclasses():
4396 mod = sys.modules[__name__]
4397 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
4398
4399
4400def suite():
4401 suite = unittest.TestSuite()
4402 for testclass in _testclasses():
4403 suite.addTest(unittest.makeSuite(testclass))
4404 return suite
4405
4406
4407def test_main():
4408 for testclass in _testclasses():
4409 run_unittest(testclass)
4410
4411
Ezio Melottib3aedd42010-11-20 19:04:17 +00004412
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004413if __name__ == '__main__':
4414 unittest.main(defaultTest='suite')