blob: 27356664ce5362f48dd5c4e98832faec1b6695a3 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R David Murray28346b82011-03-31 11:40:20 -040039from test.support import run_unittest, unlink
R David Murray9aaba782011-03-21 17:17:06 -040040from test.test_email import __file__ as landmark
Guido van Rossum8b3febe2007-08-30 01:15:14 +000041
42
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Ezio Melottib3aedd42010-11-20 19:04:17 +000048
Guido van Rossum8b3febe2007-08-30 01:15:14 +000049def openfile(filename, *args, **kws):
50 path = os.path.join(os.path.dirname(landmark), 'data', filename)
51 return open(path, *args, **kws)
52
53
Ezio Melottib3aedd42010-11-20 19:04:17 +000054
Guido van Rossum8b3febe2007-08-30 01:15:14 +000055# Base test class
56class TestEmailBase(unittest.TestCase):
57 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000058 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000059 if first != second:
60 sfirst = str(first)
61 ssecond = str(second)
62 rfirst = [repr(line) for line in sfirst.splitlines()]
63 rsecond = [repr(line) for line in ssecond.splitlines()]
64 diff = difflib.ndiff(rfirst, rsecond)
65 raise self.failureException(NL + NL.join(diff))
66
67 def _msgobj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -040068 with openfile(filename) as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +000069 return email.message_from_file(fp)
70
71
Ezio Melottib3aedd42010-11-20 19:04:17 +000072
Guido van Rossum8b3febe2007-08-30 01:15:14 +000073# Test various aspects of the Message class's API
74class TestMessageAPI(TestEmailBase):
75 def test_get_all(self):
76 eq = self.assertEqual
77 msg = self._msgobj('msg_20.txt')
78 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
79 eq(msg.get_all('xx', 'n/a'), 'n/a')
80
R. David Murraye5db2632010-11-20 15:10:13 +000081 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 eq = self.assertEqual
83 msg = Message()
84 eq(msg.get_charset(), None)
85 charset = Charset('iso-8859-1')
86 msg.set_charset(charset)
87 eq(msg['mime-version'], '1.0')
88 eq(msg.get_content_type(), 'text/plain')
89 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
90 eq(msg.get_param('charset'), 'iso-8859-1')
91 eq(msg['content-transfer-encoding'], 'quoted-printable')
92 eq(msg.get_charset().input_charset, 'iso-8859-1')
93 # Remove the charset
94 msg.set_charset(None)
95 eq(msg.get_charset(), None)
96 eq(msg['content-type'], 'text/plain')
97 # Try adding a charset when there's already MIME headers present
98 msg = Message()
99 msg['MIME-Version'] = '2.0'
100 msg['Content-Type'] = 'text/x-weird'
101 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
102 msg.set_charset(charset)
103 eq(msg['mime-version'], '2.0')
104 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
105 eq(msg['content-transfer-encoding'], 'quinted-puntable')
106
107 def test_set_charset_from_string(self):
108 eq = self.assertEqual
109 msg = Message()
110 msg.set_charset('us-ascii')
111 eq(msg.get_charset().input_charset, 'us-ascii')
112 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
113
114 def test_set_payload_with_charset(self):
115 msg = Message()
116 charset = Charset('iso-8859-1')
117 msg.set_payload('This is a string payload', charset)
118 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
119
120 def test_get_charsets(self):
121 eq = self.assertEqual
122
123 msg = self._msgobj('msg_08.txt')
124 charsets = msg.get_charsets()
125 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
126
127 msg = self._msgobj('msg_09.txt')
128 charsets = msg.get_charsets('dingbat')
129 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
130 'koi8-r'])
131
132 msg = self._msgobj('msg_12.txt')
133 charsets = msg.get_charsets()
134 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
135 'iso-8859-3', 'us-ascii', 'koi8-r'])
136
137 def test_get_filename(self):
138 eq = self.assertEqual
139
140 msg = self._msgobj('msg_04.txt')
141 filenames = [p.get_filename() for p in msg.get_payload()]
142 eq(filenames, ['msg.txt', 'msg.txt'])
143
144 msg = self._msgobj('msg_07.txt')
145 subpart = msg.get_payload(1)
146 eq(subpart.get_filename(), 'dingusfish.gif')
147
148 def test_get_filename_with_name_parameter(self):
149 eq = self.assertEqual
150
151 msg = self._msgobj('msg_44.txt')
152 filenames = [p.get_filename() for p in msg.get_payload()]
153 eq(filenames, ['msg.txt', 'msg.txt'])
154
155 def test_get_boundary(self):
156 eq = self.assertEqual
157 msg = self._msgobj('msg_07.txt')
158 # No quotes!
159 eq(msg.get_boundary(), 'BOUNDARY')
160
161 def test_set_boundary(self):
162 eq = self.assertEqual
163 # This one has no existing boundary parameter, but the Content-Type:
164 # header appears fifth.
165 msg = self._msgobj('msg_01.txt')
166 msg.set_boundary('BOUNDARY')
167 header, value = msg.items()[4]
168 eq(header.lower(), 'content-type')
169 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
170 # This one has a Content-Type: header, with a boundary, stuck in the
171 # middle of its headers. Make sure the order is preserved; it should
172 # be fifth.
173 msg = self._msgobj('msg_04.txt')
174 msg.set_boundary('BOUNDARY')
175 header, value = msg.items()[4]
176 eq(header.lower(), 'content-type')
177 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
178 # And this one has no Content-Type: header at all.
179 msg = self._msgobj('msg_03.txt')
180 self.assertRaises(errors.HeaderParseError,
181 msg.set_boundary, 'BOUNDARY')
182
R. David Murray73a559d2010-12-21 18:07:59 +0000183 def test_make_boundary(self):
184 msg = MIMEMultipart('form-data')
185 # Note that when the boundary gets created is an implementation
186 # detail and might change.
187 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
188 # Trigger creation of boundary
189 msg.as_string()
190 self.assertEqual(msg.items()[0][1][:33],
191 'multipart/form-data; boundary="==')
192 # XXX: there ought to be tests of the uniqueness of the boundary, too.
193
R. David Murray57c45ac2010-02-21 04:39:40 +0000194 def test_message_rfc822_only(self):
195 # Issue 7970: message/rfc822 not in multipart parsed by
196 # HeaderParser caused an exception when flattened.
R David Murray28346b82011-03-31 11:40:20 -0400197 with openfile('msg_46.txt') as fp:
Brett Cannon384917a2010-10-29 23:08:36 +0000198 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000199 parser = HeaderParser()
200 msg = parser.parsestr(msgdata)
201 out = StringIO()
202 gen = Generator(out, True, 0)
203 gen.flatten(msg, False)
204 self.assertEqual(out.getvalue(), msgdata)
205
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000206 def test_get_decoded_payload(self):
207 eq = self.assertEqual
208 msg = self._msgobj('msg_10.txt')
209 # The outer message is a multipart
210 eq(msg.get_payload(decode=True), None)
211 # Subpart 1 is 7bit encoded
212 eq(msg.get_payload(0).get_payload(decode=True),
213 b'This is a 7bit encoded message.\n')
214 # Subpart 2 is quopri
215 eq(msg.get_payload(1).get_payload(decode=True),
216 b'\xa1This is a Quoted Printable encoded message!\n')
217 # Subpart 3 is base64
218 eq(msg.get_payload(2).get_payload(decode=True),
219 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000220 # Subpart 4 is base64 with a trailing newline, which
221 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000222 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000223 b'This is a Base64 encoded message.\n')
224 # Subpart 5 has no Content-Transfer-Encoding: header.
225 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000226 b'This has no Content-Transfer-Encoding: header.\n')
227
228 def test_get_decoded_uu_payload(self):
229 eq = self.assertEqual
230 msg = Message()
231 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
232 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
233 msg['content-transfer-encoding'] = cte
234 eq(msg.get_payload(decode=True), b'hello world')
235 # Now try some bogus data
236 msg.set_payload('foo')
237 eq(msg.get_payload(decode=True), b'foo')
238
239 def test_decoded_generator(self):
240 eq = self.assertEqual
241 msg = self._msgobj('msg_07.txt')
242 with openfile('msg_17.txt') as fp:
243 text = fp.read()
244 s = StringIO()
245 g = DecodedGenerator(s)
246 g.flatten(msg)
247 eq(s.getvalue(), text)
248
249 def test__contains__(self):
250 msg = Message()
251 msg['From'] = 'Me'
252 msg['to'] = 'You'
253 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000254 self.assertTrue('from' in msg)
255 self.assertTrue('From' in msg)
256 self.assertTrue('FROM' in msg)
257 self.assertTrue('to' in msg)
258 self.assertTrue('To' in msg)
259 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000260
261 def test_as_string(self):
262 eq = self.ndiffAssertEqual
263 msg = self._msgobj('msg_01.txt')
264 with openfile('msg_01.txt') as fp:
265 text = fp.read()
266 eq(text, str(msg))
267 fullrepr = msg.as_string(unixfrom=True)
268 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000269 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000270 eq(text, NL.join(lines[1:]))
271
272 def test_bad_param(self):
273 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
274 self.assertEqual(msg.get_param('baz'), '')
275
276 def test_missing_filename(self):
277 msg = email.message_from_string("From: foo\n")
278 self.assertEqual(msg.get_filename(), None)
279
280 def test_bogus_filename(self):
281 msg = email.message_from_string(
282 "Content-Disposition: blarg; filename\n")
283 self.assertEqual(msg.get_filename(), '')
284
285 def test_missing_boundary(self):
286 msg = email.message_from_string("From: foo\n")
287 self.assertEqual(msg.get_boundary(), None)
288
289 def test_get_params(self):
290 eq = self.assertEqual
291 msg = email.message_from_string(
292 'X-Header: foo=one; bar=two; baz=three\n')
293 eq(msg.get_params(header='x-header'),
294 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
295 msg = email.message_from_string(
296 'X-Header: foo; bar=one; baz=two\n')
297 eq(msg.get_params(header='x-header'),
298 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
299 eq(msg.get_params(), None)
300 msg = email.message_from_string(
301 'X-Header: foo; bar="one"; baz=two\n')
302 eq(msg.get_params(header='x-header'),
303 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
304
305 def test_get_param_liberal(self):
306 msg = Message()
307 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
308 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
309
310 def test_get_param(self):
311 eq = self.assertEqual
312 msg = email.message_from_string(
313 "X-Header: foo=one; bar=two; baz=three\n")
314 eq(msg.get_param('bar', header='x-header'), 'two')
315 eq(msg.get_param('quuz', header='x-header'), None)
316 eq(msg.get_param('quuz'), None)
317 msg = email.message_from_string(
318 'X-Header: foo; bar="one"; baz=two\n')
319 eq(msg.get_param('foo', header='x-header'), '')
320 eq(msg.get_param('bar', header='x-header'), 'one')
321 eq(msg.get_param('baz', header='x-header'), 'two')
322 # XXX: We are not RFC-2045 compliant! We cannot parse:
323 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
324 # msg.get_param("weird")
325 # yet.
326
327 def test_get_param_funky_continuation_lines(self):
328 msg = self._msgobj('msg_22.txt')
329 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
330
331 def test_get_param_with_semis_in_quotes(self):
332 msg = email.message_from_string(
333 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
334 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
335 self.assertEqual(msg.get_param('name', unquote=False),
336 '"Jim&amp;&amp;Jill"')
337
R. David Murrayd48739f2010-04-14 18:59:18 +0000338 def test_get_param_with_quotes(self):
339 msg = email.message_from_string(
340 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
341 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
342 msg = email.message_from_string(
343 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
344 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
345
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000346 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000347 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000348 msg = email.message_from_string('Header: exists')
349 unless('header' in msg)
350 unless('Header' in msg)
351 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000352 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000353
354 def test_set_param(self):
355 eq = self.assertEqual
356 msg = Message()
357 msg.set_param('charset', 'iso-2022-jp')
358 eq(msg.get_param('charset'), 'iso-2022-jp')
359 msg.set_param('importance', 'high value')
360 eq(msg.get_param('importance'), 'high value')
361 eq(msg.get_param('importance', unquote=False), '"high value"')
362 eq(msg.get_params(), [('text/plain', ''),
363 ('charset', 'iso-2022-jp'),
364 ('importance', 'high value')])
365 eq(msg.get_params(unquote=False), [('text/plain', ''),
366 ('charset', '"iso-2022-jp"'),
367 ('importance', '"high value"')])
368 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
369 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
370
371 def test_del_param(self):
372 eq = self.assertEqual
373 msg = self._msgobj('msg_05.txt')
374 eq(msg.get_params(),
375 [('multipart/report', ''), ('report-type', 'delivery-status'),
376 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
377 old_val = msg.get_param("report-type")
378 msg.del_param("report-type")
379 eq(msg.get_params(),
380 [('multipart/report', ''),
381 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
382 msg.set_param("report-type", old_val)
383 eq(msg.get_params(),
384 [('multipart/report', ''),
385 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
386 ('report-type', old_val)])
387
388 def test_del_param_on_other_header(self):
389 msg = Message()
390 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
391 msg.del_param('filename', 'content-disposition')
392 self.assertEqual(msg['content-disposition'], 'attachment')
393
394 def test_set_type(self):
395 eq = self.assertEqual
396 msg = Message()
397 self.assertRaises(ValueError, msg.set_type, 'text')
398 msg.set_type('text/plain')
399 eq(msg['content-type'], 'text/plain')
400 msg.set_param('charset', 'us-ascii')
401 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
402 msg.set_type('text/html')
403 eq(msg['content-type'], 'text/html; charset="us-ascii"')
404
405 def test_set_type_on_other_header(self):
406 msg = Message()
407 msg['X-Content-Type'] = 'text/plain'
408 msg.set_type('application/octet-stream', 'X-Content-Type')
409 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
410
411 def test_get_content_type_missing(self):
412 msg = Message()
413 self.assertEqual(msg.get_content_type(), 'text/plain')
414
415 def test_get_content_type_missing_with_default_type(self):
416 msg = Message()
417 msg.set_default_type('message/rfc822')
418 self.assertEqual(msg.get_content_type(), 'message/rfc822')
419
420 def test_get_content_type_from_message_implicit(self):
421 msg = self._msgobj('msg_30.txt')
422 self.assertEqual(msg.get_payload(0).get_content_type(),
423 'message/rfc822')
424
425 def test_get_content_type_from_message_explicit(self):
426 msg = self._msgobj('msg_28.txt')
427 self.assertEqual(msg.get_payload(0).get_content_type(),
428 'message/rfc822')
429
430 def test_get_content_type_from_message_text_plain_implicit(self):
431 msg = self._msgobj('msg_03.txt')
432 self.assertEqual(msg.get_content_type(), 'text/plain')
433
434 def test_get_content_type_from_message_text_plain_explicit(self):
435 msg = self._msgobj('msg_01.txt')
436 self.assertEqual(msg.get_content_type(), 'text/plain')
437
438 def test_get_content_maintype_missing(self):
439 msg = Message()
440 self.assertEqual(msg.get_content_maintype(), 'text')
441
442 def test_get_content_maintype_missing_with_default_type(self):
443 msg = Message()
444 msg.set_default_type('message/rfc822')
445 self.assertEqual(msg.get_content_maintype(), 'message')
446
447 def test_get_content_maintype_from_message_implicit(self):
448 msg = self._msgobj('msg_30.txt')
449 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
450
451 def test_get_content_maintype_from_message_explicit(self):
452 msg = self._msgobj('msg_28.txt')
453 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
454
455 def test_get_content_maintype_from_message_text_plain_implicit(self):
456 msg = self._msgobj('msg_03.txt')
457 self.assertEqual(msg.get_content_maintype(), 'text')
458
459 def test_get_content_maintype_from_message_text_plain_explicit(self):
460 msg = self._msgobj('msg_01.txt')
461 self.assertEqual(msg.get_content_maintype(), 'text')
462
463 def test_get_content_subtype_missing(self):
464 msg = Message()
465 self.assertEqual(msg.get_content_subtype(), 'plain')
466
467 def test_get_content_subtype_missing_with_default_type(self):
468 msg = Message()
469 msg.set_default_type('message/rfc822')
470 self.assertEqual(msg.get_content_subtype(), 'rfc822')
471
472 def test_get_content_subtype_from_message_implicit(self):
473 msg = self._msgobj('msg_30.txt')
474 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
475
476 def test_get_content_subtype_from_message_explicit(self):
477 msg = self._msgobj('msg_28.txt')
478 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
479
480 def test_get_content_subtype_from_message_text_plain_implicit(self):
481 msg = self._msgobj('msg_03.txt')
482 self.assertEqual(msg.get_content_subtype(), 'plain')
483
484 def test_get_content_subtype_from_message_text_plain_explicit(self):
485 msg = self._msgobj('msg_01.txt')
486 self.assertEqual(msg.get_content_subtype(), 'plain')
487
488 def test_get_content_maintype_error(self):
489 msg = Message()
490 msg['Content-Type'] = 'no-slash-in-this-string'
491 self.assertEqual(msg.get_content_maintype(), 'text')
492
493 def test_get_content_subtype_error(self):
494 msg = Message()
495 msg['Content-Type'] = 'no-slash-in-this-string'
496 self.assertEqual(msg.get_content_subtype(), 'plain')
497
498 def test_replace_header(self):
499 eq = self.assertEqual
500 msg = Message()
501 msg.add_header('First', 'One')
502 msg.add_header('Second', 'Two')
503 msg.add_header('Third', 'Three')
504 eq(msg.keys(), ['First', 'Second', 'Third'])
505 eq(msg.values(), ['One', 'Two', 'Three'])
506 msg.replace_header('Second', 'Twenty')
507 eq(msg.keys(), ['First', 'Second', 'Third'])
508 eq(msg.values(), ['One', 'Twenty', 'Three'])
509 msg.add_header('First', 'Eleven')
510 msg.replace_header('First', 'One Hundred')
511 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
512 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
513 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
514
515 def test_broken_base64_payload(self):
516 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
517 msg = Message()
518 msg['content-type'] = 'audio/x-midi'
519 msg['content-transfer-encoding'] = 'base64'
520 msg.set_payload(x)
521 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000522 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000523
R. David Murray7ec754b2010-12-13 23:51:19 +0000524 # Issue 1078919
525 def test_ascii_add_header(self):
526 msg = Message()
527 msg.add_header('Content-Disposition', 'attachment',
528 filename='bud.gif')
529 self.assertEqual('attachment; filename="bud.gif"',
530 msg['Content-Disposition'])
531
532 def test_noascii_add_header(self):
533 msg = Message()
534 msg.add_header('Content-Disposition', 'attachment',
535 filename="Fußballer.ppt")
536 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000537 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000538 msg['Content-Disposition'])
539
540 def test_nonascii_add_header_via_triple(self):
541 msg = Message()
542 msg.add_header('Content-Disposition', 'attachment',
543 filename=('iso-8859-1', '', 'Fußballer.ppt'))
544 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000545 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
546 msg['Content-Disposition'])
547
548 def test_ascii_add_header_with_tspecial(self):
549 msg = Message()
550 msg.add_header('Content-Disposition', 'attachment',
551 filename="windows [filename].ppt")
552 self.assertEqual(
553 'attachment; filename="windows [filename].ppt"',
554 msg['Content-Disposition'])
555
556 def test_nonascii_add_header_with_tspecial(self):
557 msg = Message()
558 msg.add_header('Content-Disposition', 'attachment',
559 filename="Fußballer [filename].ppt")
560 self.assertEqual(
561 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000562 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000563
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000564 # Issue 5871: reject an attempt to embed a header inside a header value
565 # (header injection attack).
566 def test_embeded_header_via_Header_rejected(self):
567 msg = Message()
568 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
569 self.assertRaises(errors.HeaderParseError, msg.as_string)
570
571 def test_embeded_header_via_string_rejected(self):
572 msg = Message()
573 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
574 self.assertRaises(errors.HeaderParseError, msg.as_string)
575
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000576# Test the email.encoders module
577class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400578
579 def test_EncodersEncode_base64(self):
580 with openfile('PyBanner048.gif', 'rb') as fp:
581 bindata = fp.read()
582 mimed = email.mime.image.MIMEImage(bindata)
583 base64ed = mimed.get_payload()
584 # the transfer-encoded body lines should all be <=76 characters
585 lines = base64ed.split('\n')
586 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
587
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000588 def test_encode_empty_payload(self):
589 eq = self.assertEqual
590 msg = Message()
591 msg.set_charset('us-ascii')
592 eq(msg['content-transfer-encoding'], '7bit')
593
594 def test_default_cte(self):
595 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000596 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000597 msg = MIMEText('hello world')
598 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000599 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000600 msg = MIMEText('hello \xf8 world')
601 eq(msg['content-transfer-encoding'], '8bit')
602 # And now with a different charset
603 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
604 eq(msg['content-transfer-encoding'], 'quoted-printable')
605
R. David Murraye85200d2010-05-06 01:41:14 +0000606 def test_encode7or8bit(self):
607 # Make sure a charset whose input character set is 8bit but
608 # whose output character set is 7bit gets a transfer-encoding
609 # of 7bit.
610 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000611 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000612 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000613
Ezio Melottib3aedd42010-11-20 19:04:17 +0000614
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000615# Test long header wrapping
616class TestLongHeaders(TestEmailBase):
617 def test_split_long_continuation(self):
618 eq = self.ndiffAssertEqual
619 msg = email.message_from_string("""\
620Subject: bug demonstration
621\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
622\tmore text
623
624test
625""")
626 sfp = StringIO()
627 g = Generator(sfp)
628 g.flatten(msg)
629 eq(sfp.getvalue(), """\
630Subject: bug demonstration
631\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
632\tmore text
633
634test
635""")
636
637 def test_another_long_almost_unsplittable_header(self):
638 eq = self.ndiffAssertEqual
639 hstr = """\
640bug demonstration
641\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
642\tmore text"""
643 h = Header(hstr, continuation_ws='\t')
644 eq(h.encode(), """\
645bug demonstration
646\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
647\tmore text""")
648 h = Header(hstr.replace('\t', ' '))
649 eq(h.encode(), """\
650bug demonstration
651 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
652 more text""")
653
654 def test_long_nonstring(self):
655 eq = self.ndiffAssertEqual
656 g = Charset("iso-8859-1")
657 cz = Charset("iso-8859-2")
658 utf8 = Charset("utf-8")
659 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
660 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
661 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
662 b'bef\xf6rdert. ')
663 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
664 b'd\xf9vtipu.. ')
665 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
666 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
667 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
668 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
669 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
670 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
671 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
672 '\u3044\u307e\u3059\u3002')
673 h = Header(g_head, g, header_name='Subject')
674 h.append(cz_head, cz)
675 h.append(utf8_head, utf8)
676 msg = Message()
677 msg['Subject'] = h
678 sfp = StringIO()
679 g = Generator(sfp)
680 g.flatten(msg)
681 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000682Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
683 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
684 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
685 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
686 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
687 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
688 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
689 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
690 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
691 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
692 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000693
694""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000695 eq(h.encode(maxlinelen=76), """\
696=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
697 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
698 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
699 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
700 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
701 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
702 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
703 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
704 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
705 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
706 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000707
708 def test_long_header_encode(self):
709 eq = self.ndiffAssertEqual
710 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
711 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
712 header_name='X-Foobar-Spoink-Defrobnit')
713 eq(h.encode(), '''\
714wasnipoop; giraffes="very-long-necked-animals";
715 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
716
717 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
718 eq = self.ndiffAssertEqual
719 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
720 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
721 header_name='X-Foobar-Spoink-Defrobnit',
722 continuation_ws='\t')
723 eq(h.encode(), '''\
724wasnipoop; giraffes="very-long-necked-animals";
725 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
726
727 def test_long_header_encode_with_tab_continuation(self):
728 eq = self.ndiffAssertEqual
729 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
730 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
731 header_name='X-Foobar-Spoink-Defrobnit',
732 continuation_ws='\t')
733 eq(h.encode(), '''\
734wasnipoop; giraffes="very-long-necked-animals";
735\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
736
R David Murray3a6152f2011-03-14 21:13:03 -0400737 def test_header_encode_with_different_output_charset(self):
738 h = Header('文', 'euc-jp')
739 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
740
741 def test_long_header_encode_with_different_output_charset(self):
742 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
743 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
744 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
745 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
746 res = """\
747=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
748 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
749 self.assertEqual(h.encode(), res)
750
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000751 def test_header_splitter(self):
752 eq = self.ndiffAssertEqual
753 msg = MIMEText('')
754 # It'd be great if we could use add_header() here, but that doesn't
755 # guarantee an order of the parameters.
756 msg['X-Foobar-Spoink-Defrobnit'] = (
757 'wasnipoop; giraffes="very-long-necked-animals"; '
758 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
759 sfp = StringIO()
760 g = Generator(sfp)
761 g.flatten(msg)
762 eq(sfp.getvalue(), '''\
763Content-Type: text/plain; charset="us-ascii"
764MIME-Version: 1.0
765Content-Transfer-Encoding: 7bit
766X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
767 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
768
769''')
770
771 def test_no_semis_header_splitter(self):
772 eq = self.ndiffAssertEqual
773 msg = Message()
774 msg['From'] = 'test@dom.ain'
775 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
776 msg.set_payload('Test')
777 sfp = StringIO()
778 g = Generator(sfp)
779 g.flatten(msg)
780 eq(sfp.getvalue(), """\
781From: test@dom.ain
782References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
783 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
784
785Test""")
786
787 def test_no_split_long_header(self):
788 eq = self.ndiffAssertEqual
789 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000790 h = Header(hstr)
791 # These come on two lines because Headers are really field value
792 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000793 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000794References:
795 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
796 h = Header('x' * 80)
797 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000798
799 def test_splitting_multiple_long_lines(self):
800 eq = self.ndiffAssertEqual
801 hstr = """\
802from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
803\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
804\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
805"""
806 h = Header(hstr, continuation_ws='\t')
807 eq(h.encode(), """\
808from babylon.socal-raves.org (localhost [127.0.0.1]);
809 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
810 for <mailman-admin@babylon.socal-raves.org>;
811 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
812\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
813 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
814 for <mailman-admin@babylon.socal-raves.org>;
815 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
816\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
817 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
818 for <mailman-admin@babylon.socal-raves.org>;
819 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
820
821 def test_splitting_first_line_only_is_long(self):
822 eq = self.ndiffAssertEqual
823 hstr = """\
824from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
825\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
826\tid 17k4h5-00034i-00
827\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
828 h = Header(hstr, maxlinelen=78, header_name='Received',
829 continuation_ws='\t')
830 eq(h.encode(), """\
831from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
832 helo=cthulhu.gerg.ca)
833\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
834\tid 17k4h5-00034i-00
835\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
836
837 def test_long_8bit_header(self):
838 eq = self.ndiffAssertEqual
839 msg = Message()
840 h = Header('Britische Regierung gibt', 'iso-8859-1',
841 header_name='Subject')
842 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +0000843 eq(h.encode(maxlinelen=76), """\
844=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
845 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000846 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +0000847 eq(msg.as_string(maxheaderlen=76), """\
848Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
849 =?iso-8859-1?q?hore-Windkraftprojekte?=
850
851""")
852 eq(msg.as_string(maxheaderlen=0), """\
853Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000854
855""")
856
857 def test_long_8bit_header_no_charset(self):
858 eq = self.ndiffAssertEqual
859 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +0000860 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
861 'f\xfcr Offshore-Windkraftprojekte '
862 '<a-very-long-address@example.com>')
863 msg['Reply-To'] = header_string
864 self.assertRaises(UnicodeEncodeError, msg.as_string)
865 msg = Message()
866 msg['Reply-To'] = Header(header_string, 'utf-8',
867 header_name='Reply-To')
868 eq(msg.as_string(maxheaderlen=78), """\
869Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
870 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000871
872""")
873
874 def test_long_to_header(self):
875 eq = self.ndiffAssertEqual
876 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
877 '<someone@eecs.umich.edu>,'
878 '"Someone Test #B" <someone@umich.edu>, '
879 '"Someone Test #C" <someone@eecs.umich.edu>, '
880 '"Someone Test #D" <someone@eecs.umich.edu>')
881 msg = Message()
882 msg['To'] = to
883 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +0000884To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000885 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +0000886 "Someone Test #C" <someone@eecs.umich.edu>,
887 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000888
889''')
890
891 def test_long_line_after_append(self):
892 eq = self.ndiffAssertEqual
893 s = 'This is an example of string which has almost the limit of header length.'
894 h = Header(s)
895 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +0000896 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000897This is an example of string which has almost the limit of header length.
898 Add another line.""")
899
900 def test_shorter_line_with_append(self):
901 eq = self.ndiffAssertEqual
902 s = 'This is a shorter line.'
903 h = Header(s)
904 h.append('Add another sentence. (Surprise?)')
905 eq(h.encode(),
906 'This is a shorter line. Add another sentence. (Surprise?)')
907
908 def test_long_field_name(self):
909 eq = self.ndiffAssertEqual
910 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +0000911 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
912 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
913 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
914 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000915 h = Header(gs, 'iso-8859-1', header_name=fn)
916 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +0000917 eq(h.encode(maxlinelen=76), """\
918=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
919 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
920 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
921 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000922
923 def test_long_received_header(self):
924 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
925 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
926 'Wed, 05 Mar 2003 18:10:18 -0700')
927 msg = Message()
928 msg['Received-1'] = Header(h, continuation_ws='\t')
929 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000930 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000931 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000932Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
933 Wed, 05 Mar 2003 18:10:18 -0700
934Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
935 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000936
937""")
938
939 def test_string_headerinst_eq(self):
940 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
941 'tu-muenchen.de> (David Bremner\'s message of '
942 '"Thu, 6 Mar 2003 13:58:21 +0100")')
943 msg = Message()
944 msg['Received-1'] = Header(h, header_name='Received-1',
945 continuation_ws='\t')
946 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000947 # XXX This should be splitting on spaces not commas.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000948 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000949Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
950 6 Mar 2003 13:58:21 +0100\")
951Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
952 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000953
954""")
955
956 def test_long_unbreakable_lines_with_continuation(self):
957 eq = self.ndiffAssertEqual
958 msg = Message()
959 t = """\
960iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
961 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
962 msg['Face-1'] = t
963 msg['Face-2'] = Header(t, header_name='Face-2')
Barry Warsawbef9d212007-08-31 10:55:37 +0000964 # XXX This splitting is all wrong. It the first value line should be
965 # snug against the field name.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000966 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +0000967Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000968 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000969 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +0000970Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +0000971 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000972 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
973
974""")
975
976 def test_another_long_multiline_header(self):
977 eq = self.ndiffAssertEqual
978 m = ('Received: from siimage.com '
979 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +0000980 'Microsoft SMTPSVC(5.0.2195.4905); '
981 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000982 msg = email.message_from_string(m)
983 eq(msg.as_string(maxheaderlen=78), '''\
Barry Warsawbef9d212007-08-31 10:55:37 +0000984Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
985 Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000986
987''')
988
989 def test_long_lines_with_different_header(self):
990 eq = self.ndiffAssertEqual
991 h = ('List-Unsubscribe: '
992 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
993 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
994 '?subject=unsubscribe>')
995 msg = Message()
996 msg['List'] = h
997 msg['List'] = Header(h, header_name='List')
998 eq(msg.as_string(maxheaderlen=78), """\
999List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001000 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001001List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001002 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001003
1004""")
1005
R. David Murray6f0022d2011-01-07 21:57:25 +00001006 def test_long_rfc2047_header_with_embedded_fws(self):
1007 h = Header(textwrap.dedent("""\
1008 We're going to pretend this header is in a non-ascii character set
1009 \tto see if line wrapping with encoded words and embedded
1010 folding white space works"""),
1011 charset='utf-8',
1012 header_name='Test')
1013 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1014 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1015 =?utf-8?q?cter_set?=
1016 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1017 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1018
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001019
Ezio Melottib3aedd42010-11-20 19:04:17 +00001020
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001021# Test mangling of "From " lines in the body of a message
1022class TestFromMangling(unittest.TestCase):
1023 def setUp(self):
1024 self.msg = Message()
1025 self.msg['From'] = 'aaa@bbb.org'
1026 self.msg.set_payload("""\
1027From the desk of A.A.A.:
1028Blah blah blah
1029""")
1030
1031 def test_mangled_from(self):
1032 s = StringIO()
1033 g = Generator(s, mangle_from_=True)
1034 g.flatten(self.msg)
1035 self.assertEqual(s.getvalue(), """\
1036From: aaa@bbb.org
1037
1038>From the desk of A.A.A.:
1039Blah blah blah
1040""")
1041
1042 def test_dont_mangle_from(self):
1043 s = StringIO()
1044 g = Generator(s, mangle_from_=False)
1045 g.flatten(self.msg)
1046 self.assertEqual(s.getvalue(), """\
1047From: aaa@bbb.org
1048
1049From the desk of A.A.A.:
1050Blah blah blah
1051""")
1052
1053
Ezio Melottib3aedd42010-11-20 19:04:17 +00001054
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001055# Test the basic MIMEAudio class
1056class TestMIMEAudio(unittest.TestCase):
1057 def setUp(self):
R David Murray28346b82011-03-31 11:40:20 -04001058 with openfile('audiotest.au', 'rb') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001059 self._audiodata = fp.read()
1060 self._au = MIMEAudio(self._audiodata)
1061
1062 def test_guess_minor_type(self):
1063 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1064
1065 def test_encoding(self):
1066 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001067 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1068 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001069
1070 def test_checkSetMinor(self):
1071 au = MIMEAudio(self._audiodata, 'fish')
1072 self.assertEqual(au.get_content_type(), 'audio/fish')
1073
1074 def test_add_header(self):
1075 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001076 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001077 self._au.add_header('Content-Disposition', 'attachment',
1078 filename='audiotest.au')
1079 eq(self._au['content-disposition'],
1080 'attachment; filename="audiotest.au"')
1081 eq(self._au.get_params(header='content-disposition'),
1082 [('attachment', ''), ('filename', 'audiotest.au')])
1083 eq(self._au.get_param('filename', header='content-disposition'),
1084 'audiotest.au')
1085 missing = []
1086 eq(self._au.get_param('attachment', header='content-disposition'), '')
1087 unless(self._au.get_param('foo', failobj=missing,
1088 header='content-disposition') is missing)
1089 # Try some missing stuff
1090 unless(self._au.get_param('foobar', missing) is missing)
1091 unless(self._au.get_param('attachment', missing,
1092 header='foobar') is missing)
1093
1094
Ezio Melottib3aedd42010-11-20 19:04:17 +00001095
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001096# Test the basic MIMEImage class
1097class TestMIMEImage(unittest.TestCase):
1098 def setUp(self):
1099 with openfile('PyBanner048.gif', 'rb') as fp:
1100 self._imgdata = fp.read()
1101 self._im = MIMEImage(self._imgdata)
1102
1103 def test_guess_minor_type(self):
1104 self.assertEqual(self._im.get_content_type(), 'image/gif')
1105
1106 def test_encoding(self):
1107 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001108 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1109 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001110
1111 def test_checkSetMinor(self):
1112 im = MIMEImage(self._imgdata, 'fish')
1113 self.assertEqual(im.get_content_type(), 'image/fish')
1114
1115 def test_add_header(self):
1116 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001117 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001118 self._im.add_header('Content-Disposition', 'attachment',
1119 filename='dingusfish.gif')
1120 eq(self._im['content-disposition'],
1121 'attachment; filename="dingusfish.gif"')
1122 eq(self._im.get_params(header='content-disposition'),
1123 [('attachment', ''), ('filename', 'dingusfish.gif')])
1124 eq(self._im.get_param('filename', header='content-disposition'),
1125 'dingusfish.gif')
1126 missing = []
1127 eq(self._im.get_param('attachment', header='content-disposition'), '')
1128 unless(self._im.get_param('foo', failobj=missing,
1129 header='content-disposition') is missing)
1130 # Try some missing stuff
1131 unless(self._im.get_param('foobar', missing) is missing)
1132 unless(self._im.get_param('attachment', missing,
1133 header='foobar') is missing)
1134
1135
Ezio Melottib3aedd42010-11-20 19:04:17 +00001136
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001137# Test the basic MIMEApplication class
1138class TestMIMEApplication(unittest.TestCase):
1139 def test_headers(self):
1140 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001141 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001142 eq(msg.get_content_type(), 'application/octet-stream')
1143 eq(msg['content-transfer-encoding'], 'base64')
1144
1145 def test_body(self):
1146 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001147 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1148 msg = MIMEApplication(bytesdata)
1149 # whitespace in the cte encoded block is RFC-irrelevant.
1150 eq(msg.get_payload().strip(), '+vv8/f7/')
1151 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001152
1153
Ezio Melottib3aedd42010-11-20 19:04:17 +00001154
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001155# Test the basic MIMEText class
1156class TestMIMEText(unittest.TestCase):
1157 def setUp(self):
1158 self._msg = MIMEText('hello there')
1159
1160 def test_types(self):
1161 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001162 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001163 eq(self._msg.get_content_type(), 'text/plain')
1164 eq(self._msg.get_param('charset'), 'us-ascii')
1165 missing = []
1166 unless(self._msg.get_param('foobar', missing) is missing)
1167 unless(self._msg.get_param('charset', missing, header='foobar')
1168 is missing)
1169
1170 def test_payload(self):
1171 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001172 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001173
1174 def test_charset(self):
1175 eq = self.assertEqual
1176 msg = MIMEText('hello there', _charset='us-ascii')
1177 eq(msg.get_charset().input_charset, 'us-ascii')
1178 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1179
R. David Murray850fc852010-06-03 01:58:28 +00001180 def test_7bit_input(self):
1181 eq = self.assertEqual
1182 msg = MIMEText('hello there', _charset='us-ascii')
1183 eq(msg.get_charset().input_charset, 'us-ascii')
1184 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1185
1186 def test_7bit_input_no_charset(self):
1187 eq = self.assertEqual
1188 msg = MIMEText('hello there')
1189 eq(msg.get_charset(), 'us-ascii')
1190 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1191 self.assertTrue('hello there' in msg.as_string())
1192
1193 def test_utf8_input(self):
1194 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1195 eq = self.assertEqual
1196 msg = MIMEText(teststr, _charset='utf-8')
1197 eq(msg.get_charset().output_charset, 'utf-8')
1198 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1199 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1200
1201 @unittest.skip("can't fix because of backward compat in email5, "
1202 "will fix in email6")
1203 def test_utf8_input_no_charset(self):
1204 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1205 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1206
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001207
Ezio Melottib3aedd42010-11-20 19:04:17 +00001208
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001209# Test complicated multipart/* messages
1210class TestMultipart(TestEmailBase):
1211 def setUp(self):
1212 with openfile('PyBanner048.gif', 'rb') as fp:
1213 data = fp.read()
1214 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1215 image = MIMEImage(data, name='dingusfish.gif')
1216 image.add_header('content-disposition', 'attachment',
1217 filename='dingusfish.gif')
1218 intro = MIMEText('''\
1219Hi there,
1220
1221This is the dingus fish.
1222''')
1223 container.attach(intro)
1224 container.attach(image)
1225 container['From'] = 'Barry <barry@digicool.com>'
1226 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1227 container['Subject'] = 'Here is your dingus fish'
1228
1229 now = 987809702.54848599
1230 timetuple = time.localtime(now)
1231 if timetuple[-1] == 0:
1232 tzsecs = time.timezone
1233 else:
1234 tzsecs = time.altzone
1235 if tzsecs > 0:
1236 sign = '-'
1237 else:
1238 sign = '+'
1239 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1240 container['Date'] = time.strftime(
1241 '%a, %d %b %Y %H:%M:%S',
1242 time.localtime(now)) + tzoffset
1243 self._msg = container
1244 self._im = image
1245 self._txt = intro
1246
1247 def test_hierarchy(self):
1248 # convenience
1249 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001250 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001251 raises = self.assertRaises
1252 # tests
1253 m = self._msg
1254 unless(m.is_multipart())
1255 eq(m.get_content_type(), 'multipart/mixed')
1256 eq(len(m.get_payload()), 2)
1257 raises(IndexError, m.get_payload, 2)
1258 m0 = m.get_payload(0)
1259 m1 = m.get_payload(1)
1260 unless(m0 is self._txt)
1261 unless(m1 is self._im)
1262 eq(m.get_payload(), [m0, m1])
1263 unless(not m0.is_multipart())
1264 unless(not m1.is_multipart())
1265
1266 def test_empty_multipart_idempotent(self):
1267 text = """\
1268Content-Type: multipart/mixed; boundary="BOUNDARY"
1269MIME-Version: 1.0
1270Subject: A subject
1271To: aperson@dom.ain
1272From: bperson@dom.ain
1273
1274
1275--BOUNDARY
1276
1277
1278--BOUNDARY--
1279"""
1280 msg = Parser().parsestr(text)
1281 self.ndiffAssertEqual(text, msg.as_string())
1282
1283 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1284 outer = MIMEBase('multipart', 'mixed')
1285 outer['Subject'] = 'A subject'
1286 outer['To'] = 'aperson@dom.ain'
1287 outer['From'] = 'bperson@dom.ain'
1288 outer.set_boundary('BOUNDARY')
1289 self.ndiffAssertEqual(outer.as_string(), '''\
1290Content-Type: multipart/mixed; boundary="BOUNDARY"
1291MIME-Version: 1.0
1292Subject: A subject
1293To: aperson@dom.ain
1294From: bperson@dom.ain
1295
1296--BOUNDARY
1297
1298--BOUNDARY--''')
1299
1300 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1301 outer = MIMEBase('multipart', 'mixed')
1302 outer['Subject'] = 'A subject'
1303 outer['To'] = 'aperson@dom.ain'
1304 outer['From'] = 'bperson@dom.ain'
1305 outer.preamble = ''
1306 outer.epilogue = ''
1307 outer.set_boundary('BOUNDARY')
1308 self.ndiffAssertEqual(outer.as_string(), '''\
1309Content-Type: multipart/mixed; boundary="BOUNDARY"
1310MIME-Version: 1.0
1311Subject: A subject
1312To: aperson@dom.ain
1313From: bperson@dom.ain
1314
1315
1316--BOUNDARY
1317
1318--BOUNDARY--
1319''')
1320
1321 def test_one_part_in_a_multipart(self):
1322 eq = self.ndiffAssertEqual
1323 outer = MIMEBase('multipart', 'mixed')
1324 outer['Subject'] = 'A subject'
1325 outer['To'] = 'aperson@dom.ain'
1326 outer['From'] = 'bperson@dom.ain'
1327 outer.set_boundary('BOUNDARY')
1328 msg = MIMEText('hello world')
1329 outer.attach(msg)
1330 eq(outer.as_string(), '''\
1331Content-Type: multipart/mixed; boundary="BOUNDARY"
1332MIME-Version: 1.0
1333Subject: A subject
1334To: aperson@dom.ain
1335From: bperson@dom.ain
1336
1337--BOUNDARY
1338Content-Type: text/plain; charset="us-ascii"
1339MIME-Version: 1.0
1340Content-Transfer-Encoding: 7bit
1341
1342hello world
1343--BOUNDARY--''')
1344
1345 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1346 eq = self.ndiffAssertEqual
1347 outer = MIMEBase('multipart', 'mixed')
1348 outer['Subject'] = 'A subject'
1349 outer['To'] = 'aperson@dom.ain'
1350 outer['From'] = 'bperson@dom.ain'
1351 outer.preamble = ''
1352 msg = MIMEText('hello world')
1353 outer.attach(msg)
1354 outer.set_boundary('BOUNDARY')
1355 eq(outer.as_string(), '''\
1356Content-Type: multipart/mixed; boundary="BOUNDARY"
1357MIME-Version: 1.0
1358Subject: A subject
1359To: aperson@dom.ain
1360From: bperson@dom.ain
1361
1362
1363--BOUNDARY
1364Content-Type: text/plain; charset="us-ascii"
1365MIME-Version: 1.0
1366Content-Transfer-Encoding: 7bit
1367
1368hello world
1369--BOUNDARY--''')
1370
1371
1372 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1373 eq = self.ndiffAssertEqual
1374 outer = MIMEBase('multipart', 'mixed')
1375 outer['Subject'] = 'A subject'
1376 outer['To'] = 'aperson@dom.ain'
1377 outer['From'] = 'bperson@dom.ain'
1378 outer.preamble = None
1379 msg = MIMEText('hello world')
1380 outer.attach(msg)
1381 outer.set_boundary('BOUNDARY')
1382 eq(outer.as_string(), '''\
1383Content-Type: multipart/mixed; boundary="BOUNDARY"
1384MIME-Version: 1.0
1385Subject: A subject
1386To: aperson@dom.ain
1387From: bperson@dom.ain
1388
1389--BOUNDARY
1390Content-Type: text/plain; charset="us-ascii"
1391MIME-Version: 1.0
1392Content-Transfer-Encoding: 7bit
1393
1394hello world
1395--BOUNDARY--''')
1396
1397
1398 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1399 eq = self.ndiffAssertEqual
1400 outer = MIMEBase('multipart', 'mixed')
1401 outer['Subject'] = 'A subject'
1402 outer['To'] = 'aperson@dom.ain'
1403 outer['From'] = 'bperson@dom.ain'
1404 outer.epilogue = None
1405 msg = MIMEText('hello world')
1406 outer.attach(msg)
1407 outer.set_boundary('BOUNDARY')
1408 eq(outer.as_string(), '''\
1409Content-Type: multipart/mixed; boundary="BOUNDARY"
1410MIME-Version: 1.0
1411Subject: A subject
1412To: aperson@dom.ain
1413From: bperson@dom.ain
1414
1415--BOUNDARY
1416Content-Type: text/plain; charset="us-ascii"
1417MIME-Version: 1.0
1418Content-Transfer-Encoding: 7bit
1419
1420hello world
1421--BOUNDARY--''')
1422
1423
1424 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1425 eq = self.ndiffAssertEqual
1426 outer = MIMEBase('multipart', 'mixed')
1427 outer['Subject'] = 'A subject'
1428 outer['To'] = 'aperson@dom.ain'
1429 outer['From'] = 'bperson@dom.ain'
1430 outer.epilogue = ''
1431 msg = MIMEText('hello world')
1432 outer.attach(msg)
1433 outer.set_boundary('BOUNDARY')
1434 eq(outer.as_string(), '''\
1435Content-Type: multipart/mixed; boundary="BOUNDARY"
1436MIME-Version: 1.0
1437Subject: A subject
1438To: aperson@dom.ain
1439From: bperson@dom.ain
1440
1441--BOUNDARY
1442Content-Type: text/plain; charset="us-ascii"
1443MIME-Version: 1.0
1444Content-Transfer-Encoding: 7bit
1445
1446hello world
1447--BOUNDARY--
1448''')
1449
1450
1451 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1452 eq = self.ndiffAssertEqual
1453 outer = MIMEBase('multipart', 'mixed')
1454 outer['Subject'] = 'A subject'
1455 outer['To'] = 'aperson@dom.ain'
1456 outer['From'] = 'bperson@dom.ain'
1457 outer.epilogue = '\n'
1458 msg = MIMEText('hello world')
1459 outer.attach(msg)
1460 outer.set_boundary('BOUNDARY')
1461 eq(outer.as_string(), '''\
1462Content-Type: multipart/mixed; boundary="BOUNDARY"
1463MIME-Version: 1.0
1464Subject: A subject
1465To: aperson@dom.ain
1466From: bperson@dom.ain
1467
1468--BOUNDARY
1469Content-Type: text/plain; charset="us-ascii"
1470MIME-Version: 1.0
1471Content-Transfer-Encoding: 7bit
1472
1473hello world
1474--BOUNDARY--
1475
1476''')
1477
1478 def test_message_external_body(self):
1479 eq = self.assertEqual
1480 msg = self._msgobj('msg_36.txt')
1481 eq(len(msg.get_payload()), 2)
1482 msg1 = msg.get_payload(1)
1483 eq(msg1.get_content_type(), 'multipart/alternative')
1484 eq(len(msg1.get_payload()), 2)
1485 for subpart in msg1.get_payload():
1486 eq(subpart.get_content_type(), 'message/external-body')
1487 eq(len(subpart.get_payload()), 1)
1488 subsubpart = subpart.get_payload(0)
1489 eq(subsubpart.get_content_type(), 'text/plain')
1490
1491 def test_double_boundary(self):
1492 # msg_37.txt is a multipart that contains two dash-boundary's in a
1493 # row. Our interpretation of RFC 2046 calls for ignoring the second
1494 # and subsequent boundaries.
1495 msg = self._msgobj('msg_37.txt')
1496 self.assertEqual(len(msg.get_payload()), 3)
1497
1498 def test_nested_inner_contains_outer_boundary(self):
1499 eq = self.ndiffAssertEqual
1500 # msg_38.txt has an inner part that contains outer boundaries. My
1501 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1502 # these are illegal and should be interpreted as unterminated inner
1503 # parts.
1504 msg = self._msgobj('msg_38.txt')
1505 sfp = StringIO()
1506 iterators._structure(msg, sfp)
1507 eq(sfp.getvalue(), """\
1508multipart/mixed
1509 multipart/mixed
1510 multipart/alternative
1511 text/plain
1512 text/plain
1513 text/plain
1514 text/plain
1515""")
1516
1517 def test_nested_with_same_boundary(self):
1518 eq = self.ndiffAssertEqual
1519 # msg 39.txt is similarly evil in that it's got inner parts that use
1520 # the same boundary as outer parts. Again, I believe the way this is
1521 # parsed is closest to the spirit of RFC 2046
1522 msg = self._msgobj('msg_39.txt')
1523 sfp = StringIO()
1524 iterators._structure(msg, sfp)
1525 eq(sfp.getvalue(), """\
1526multipart/mixed
1527 multipart/mixed
1528 multipart/alternative
1529 application/octet-stream
1530 application/octet-stream
1531 text/plain
1532""")
1533
1534 def test_boundary_in_non_multipart(self):
1535 msg = self._msgobj('msg_40.txt')
1536 self.assertEqual(msg.as_string(), '''\
1537MIME-Version: 1.0
1538Content-Type: text/html; boundary="--961284236552522269"
1539
1540----961284236552522269
1541Content-Type: text/html;
1542Content-Transfer-Encoding: 7Bit
1543
1544<html></html>
1545
1546----961284236552522269--
1547''')
1548
1549 def test_boundary_with_leading_space(self):
1550 eq = self.assertEqual
1551 msg = email.message_from_string('''\
1552MIME-Version: 1.0
1553Content-Type: multipart/mixed; boundary=" XXXX"
1554
1555-- XXXX
1556Content-Type: text/plain
1557
1558
1559-- XXXX
1560Content-Type: text/plain
1561
1562-- XXXX--
1563''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001564 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001565 eq(msg.get_boundary(), ' XXXX')
1566 eq(len(msg.get_payload()), 2)
1567
1568 def test_boundary_without_trailing_newline(self):
1569 m = Parser().parsestr("""\
1570Content-Type: multipart/mixed; boundary="===============0012394164=="
1571MIME-Version: 1.0
1572
1573--===============0012394164==
1574Content-Type: image/file1.jpg
1575MIME-Version: 1.0
1576Content-Transfer-Encoding: base64
1577
1578YXNkZg==
1579--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001580 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001581
1582
Ezio Melottib3aedd42010-11-20 19:04:17 +00001583
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001584# Test some badly formatted messages
1585class TestNonConformant(TestEmailBase):
1586 def test_parse_missing_minor_type(self):
1587 eq = self.assertEqual
1588 msg = self._msgobj('msg_14.txt')
1589 eq(msg.get_content_type(), 'text/plain')
1590 eq(msg.get_content_maintype(), 'text')
1591 eq(msg.get_content_subtype(), 'plain')
1592
1593 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001594 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001595 msg = self._msgobj('msg_15.txt')
1596 # XXX We can probably eventually do better
1597 inner = msg.get_payload(0)
1598 unless(hasattr(inner, 'defects'))
1599 self.assertEqual(len(inner.defects), 1)
1600 unless(isinstance(inner.defects[0],
1601 errors.StartBoundaryNotFoundDefect))
1602
1603 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001604 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001605 msg = self._msgobj('msg_25.txt')
1606 unless(isinstance(msg.get_payload(), str))
1607 self.assertEqual(len(msg.defects), 2)
1608 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1609 unless(isinstance(msg.defects[1],
1610 errors.MultipartInvariantViolationDefect))
1611
1612 def test_invalid_content_type(self):
1613 eq = self.assertEqual
1614 neq = self.ndiffAssertEqual
1615 msg = Message()
1616 # RFC 2045, $5.2 says invalid yields text/plain
1617 msg['Content-Type'] = 'text'
1618 eq(msg.get_content_maintype(), 'text')
1619 eq(msg.get_content_subtype(), 'plain')
1620 eq(msg.get_content_type(), 'text/plain')
1621 # Clear the old value and try something /really/ invalid
1622 del msg['content-type']
1623 msg['Content-Type'] = 'foo'
1624 eq(msg.get_content_maintype(), 'text')
1625 eq(msg.get_content_subtype(), 'plain')
1626 eq(msg.get_content_type(), 'text/plain')
1627 # Still, make sure that the message is idempotently generated
1628 s = StringIO()
1629 g = Generator(s)
1630 g.flatten(msg)
1631 neq(s.getvalue(), 'Content-Type: foo\n\n')
1632
1633 def test_no_start_boundary(self):
1634 eq = self.ndiffAssertEqual
1635 msg = self._msgobj('msg_31.txt')
1636 eq(msg.get_payload(), """\
1637--BOUNDARY
1638Content-Type: text/plain
1639
1640message 1
1641
1642--BOUNDARY
1643Content-Type: text/plain
1644
1645message 2
1646
1647--BOUNDARY--
1648""")
1649
1650 def test_no_separating_blank_line(self):
1651 eq = self.ndiffAssertEqual
1652 msg = self._msgobj('msg_35.txt')
1653 eq(msg.as_string(), """\
1654From: aperson@dom.ain
1655To: bperson@dom.ain
1656Subject: here's something interesting
1657
1658counter to RFC 2822, there's no separating newline here
1659""")
1660
1661 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001662 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001663 msg = self._msgobj('msg_41.txt')
1664 unless(hasattr(msg, 'defects'))
1665 self.assertEqual(len(msg.defects), 2)
1666 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1667 unless(isinstance(msg.defects[1],
1668 errors.MultipartInvariantViolationDefect))
1669
1670 def test_missing_start_boundary(self):
1671 outer = self._msgobj('msg_42.txt')
1672 # The message structure is:
1673 #
1674 # multipart/mixed
1675 # text/plain
1676 # message/rfc822
1677 # multipart/mixed [*]
1678 #
1679 # [*] This message is missing its start boundary
1680 bad = outer.get_payload(1).get_payload(0)
1681 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001682 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001683 errors.StartBoundaryNotFoundDefect))
1684
1685 def test_first_line_is_continuation_header(self):
1686 eq = self.assertEqual
1687 m = ' Line 1\nLine 2\nLine 3'
1688 msg = email.message_from_string(m)
1689 eq(msg.keys(), [])
1690 eq(msg.get_payload(), 'Line 2\nLine 3')
1691 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001692 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001693 errors.FirstHeaderLineIsContinuationDefect))
1694 eq(msg.defects[0].line, ' Line 1\n')
1695
1696
Ezio Melottib3aedd42010-11-20 19:04:17 +00001697
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001698# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001699class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001700 def test_rfc2047_multiline(self):
1701 eq = self.assertEqual
1702 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1703 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1704 dh = decode_header(s)
1705 eq(dh, [
1706 (b'Re:', None),
1707 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1708 (b'baz foo bar', None),
1709 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1710 header = make_header(dh)
1711 eq(str(header),
1712 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001713 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001714Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1715 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001716
1717 def test_whitespace_eater_unicode(self):
1718 eq = self.assertEqual
1719 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1720 dh = decode_header(s)
1721 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1722 (b'Pirard <pirard@dom.ain>', None)])
1723 header = str(make_header(dh))
1724 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1725
1726 def test_whitespace_eater_unicode_2(self):
1727 eq = self.assertEqual
1728 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1729 dh = decode_header(s)
1730 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1731 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1732 hu = str(make_header(dh))
1733 eq(hu, 'The quick brown fox jumped over the lazy dog')
1734
1735 def test_rfc2047_missing_whitespace(self):
1736 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1737 dh = decode_header(s)
1738 self.assertEqual(dh, [(s, None)])
1739
1740 def test_rfc2047_with_whitespace(self):
1741 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1742 dh = decode_header(s)
1743 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1744 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1745 (b'sbord', None)])
1746
R. David Murrayc4e69cc2010-08-03 22:14:10 +00001747 def test_rfc2047_B_bad_padding(self):
1748 s = '=?iso-8859-1?B?%s?='
1749 data = [ # only test complete bytes
1750 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
1751 ('dmk=', b'vi'), ('dmk', b'vi')
1752 ]
1753 for q, a in data:
1754 dh = decode_header(s % q)
1755 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001756
R. David Murray31e984c2010-10-01 15:40:20 +00001757 def test_rfc2047_Q_invalid_digits(self):
1758 # issue 10004.
1759 s = '=?iso-8659-1?Q?andr=e9=zz?='
1760 self.assertEqual(decode_header(s),
1761 [(b'andr\xe9=zz', 'iso-8659-1')])
1762
Ezio Melottib3aedd42010-11-20 19:04:17 +00001763
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001764# Test the MIMEMessage class
1765class TestMIMEMessage(TestEmailBase):
1766 def setUp(self):
1767 with openfile('msg_11.txt') as fp:
1768 self._text = fp.read()
1769
1770 def test_type_error(self):
1771 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1772
1773 def test_valid_argument(self):
1774 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001775 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001776 subject = 'A sub-message'
1777 m = Message()
1778 m['Subject'] = subject
1779 r = MIMEMessage(m)
1780 eq(r.get_content_type(), 'message/rfc822')
1781 payload = r.get_payload()
1782 unless(isinstance(payload, list))
1783 eq(len(payload), 1)
1784 subpart = payload[0]
1785 unless(subpart is m)
1786 eq(subpart['subject'], subject)
1787
1788 def test_bad_multipart(self):
1789 eq = self.assertEqual
1790 msg1 = Message()
1791 msg1['Subject'] = 'subpart 1'
1792 msg2 = Message()
1793 msg2['Subject'] = 'subpart 2'
1794 r = MIMEMessage(msg1)
1795 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
1796
1797 def test_generate(self):
1798 # First craft the message to be encapsulated
1799 m = Message()
1800 m['Subject'] = 'An enclosed message'
1801 m.set_payload('Here is the body of the message.\n')
1802 r = MIMEMessage(m)
1803 r['Subject'] = 'The enclosing message'
1804 s = StringIO()
1805 g = Generator(s)
1806 g.flatten(r)
1807 self.assertEqual(s.getvalue(), """\
1808Content-Type: message/rfc822
1809MIME-Version: 1.0
1810Subject: The enclosing message
1811
1812Subject: An enclosed message
1813
1814Here is the body of the message.
1815""")
1816
1817 def test_parse_message_rfc822(self):
1818 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001819 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001820 msg = self._msgobj('msg_11.txt')
1821 eq(msg.get_content_type(), 'message/rfc822')
1822 payload = msg.get_payload()
1823 unless(isinstance(payload, list))
1824 eq(len(payload), 1)
1825 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001826 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001827 eq(submsg['subject'], 'An enclosed message')
1828 eq(submsg.get_payload(), 'Here is the body of the message.\n')
1829
1830 def test_dsn(self):
1831 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001832 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001833 # msg 16 is a Delivery Status Notification, see RFC 1894
1834 msg = self._msgobj('msg_16.txt')
1835 eq(msg.get_content_type(), 'multipart/report')
1836 unless(msg.is_multipart())
1837 eq(len(msg.get_payload()), 3)
1838 # Subpart 1 is a text/plain, human readable section
1839 subpart = msg.get_payload(0)
1840 eq(subpart.get_content_type(), 'text/plain')
1841 eq(subpart.get_payload(), """\
1842This report relates to a message you sent with the following header fields:
1843
1844 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
1845 Date: Sun, 23 Sep 2001 20:10:55 -0700
1846 From: "Ian T. Henry" <henryi@oxy.edu>
1847 To: SoCal Raves <scr@socal-raves.org>
1848 Subject: [scr] yeah for Ians!!
1849
1850Your message cannot be delivered to the following recipients:
1851
1852 Recipient address: jangel1@cougar.noc.ucla.edu
1853 Reason: recipient reached disk quota
1854
1855""")
1856 # Subpart 2 contains the machine parsable DSN information. It
1857 # consists of two blocks of headers, represented by two nested Message
1858 # objects.
1859 subpart = msg.get_payload(1)
1860 eq(subpart.get_content_type(), 'message/delivery-status')
1861 eq(len(subpart.get_payload()), 2)
1862 # message/delivery-status should treat each block as a bunch of
1863 # headers, i.e. a bunch of Message objects.
1864 dsn1 = subpart.get_payload(0)
1865 unless(isinstance(dsn1, Message))
1866 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
1867 eq(dsn1.get_param('dns', header='reporting-mta'), '')
1868 # Try a missing one <wink>
1869 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
1870 dsn2 = subpart.get_payload(1)
1871 unless(isinstance(dsn2, Message))
1872 eq(dsn2['action'], 'failed')
1873 eq(dsn2.get_params(header='original-recipient'),
1874 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
1875 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
1876 # Subpart 3 is the original message
1877 subpart = msg.get_payload(2)
1878 eq(subpart.get_content_type(), 'message/rfc822')
1879 payload = subpart.get_payload()
1880 unless(isinstance(payload, list))
1881 eq(len(payload), 1)
1882 subsubpart = payload[0]
1883 unless(isinstance(subsubpart, Message))
1884 eq(subsubpart.get_content_type(), 'text/plain')
1885 eq(subsubpart['message-id'],
1886 '<002001c144a6$8752e060$56104586@oxy.edu>')
1887
1888 def test_epilogue(self):
1889 eq = self.ndiffAssertEqual
1890 with openfile('msg_21.txt') as fp:
1891 text = fp.read()
1892 msg = Message()
1893 msg['From'] = 'aperson@dom.ain'
1894 msg['To'] = 'bperson@dom.ain'
1895 msg['Subject'] = 'Test'
1896 msg.preamble = 'MIME message'
1897 msg.epilogue = 'End of MIME message\n'
1898 msg1 = MIMEText('One')
1899 msg2 = MIMEText('Two')
1900 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1901 msg.attach(msg1)
1902 msg.attach(msg2)
1903 sfp = StringIO()
1904 g = Generator(sfp)
1905 g.flatten(msg)
1906 eq(sfp.getvalue(), text)
1907
1908 def test_no_nl_preamble(self):
1909 eq = self.ndiffAssertEqual
1910 msg = Message()
1911 msg['From'] = 'aperson@dom.ain'
1912 msg['To'] = 'bperson@dom.ain'
1913 msg['Subject'] = 'Test'
1914 msg.preamble = 'MIME message'
1915 msg.epilogue = ''
1916 msg1 = MIMEText('One')
1917 msg2 = MIMEText('Two')
1918 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1919 msg.attach(msg1)
1920 msg.attach(msg2)
1921 eq(msg.as_string(), """\
1922From: aperson@dom.ain
1923To: bperson@dom.ain
1924Subject: Test
1925Content-Type: multipart/mixed; boundary="BOUNDARY"
1926
1927MIME message
1928--BOUNDARY
1929Content-Type: text/plain; charset="us-ascii"
1930MIME-Version: 1.0
1931Content-Transfer-Encoding: 7bit
1932
1933One
1934--BOUNDARY
1935Content-Type: text/plain; charset="us-ascii"
1936MIME-Version: 1.0
1937Content-Transfer-Encoding: 7bit
1938
1939Two
1940--BOUNDARY--
1941""")
1942
1943 def test_default_type(self):
1944 eq = self.assertEqual
1945 with openfile('msg_30.txt') as fp:
1946 msg = email.message_from_file(fp)
1947 container1 = msg.get_payload(0)
1948 eq(container1.get_default_type(), 'message/rfc822')
1949 eq(container1.get_content_type(), 'message/rfc822')
1950 container2 = msg.get_payload(1)
1951 eq(container2.get_default_type(), 'message/rfc822')
1952 eq(container2.get_content_type(), 'message/rfc822')
1953 container1a = container1.get_payload(0)
1954 eq(container1a.get_default_type(), 'text/plain')
1955 eq(container1a.get_content_type(), 'text/plain')
1956 container2a = container2.get_payload(0)
1957 eq(container2a.get_default_type(), 'text/plain')
1958 eq(container2a.get_content_type(), 'text/plain')
1959
1960 def test_default_type_with_explicit_container_type(self):
1961 eq = self.assertEqual
1962 with openfile('msg_28.txt') as fp:
1963 msg = email.message_from_file(fp)
1964 container1 = msg.get_payload(0)
1965 eq(container1.get_default_type(), 'message/rfc822')
1966 eq(container1.get_content_type(), 'message/rfc822')
1967 container2 = msg.get_payload(1)
1968 eq(container2.get_default_type(), 'message/rfc822')
1969 eq(container2.get_content_type(), 'message/rfc822')
1970 container1a = container1.get_payload(0)
1971 eq(container1a.get_default_type(), 'text/plain')
1972 eq(container1a.get_content_type(), 'text/plain')
1973 container2a = container2.get_payload(0)
1974 eq(container2a.get_default_type(), 'text/plain')
1975 eq(container2a.get_content_type(), 'text/plain')
1976
1977 def test_default_type_non_parsed(self):
1978 eq = self.assertEqual
1979 neq = self.ndiffAssertEqual
1980 # Set up container
1981 container = MIMEMultipart('digest', 'BOUNDARY')
1982 container.epilogue = ''
1983 # Set up subparts
1984 subpart1a = MIMEText('message 1\n')
1985 subpart2a = MIMEText('message 2\n')
1986 subpart1 = MIMEMessage(subpart1a)
1987 subpart2 = MIMEMessage(subpart2a)
1988 container.attach(subpart1)
1989 container.attach(subpart2)
1990 eq(subpart1.get_content_type(), 'message/rfc822')
1991 eq(subpart1.get_default_type(), 'message/rfc822')
1992 eq(subpart2.get_content_type(), 'message/rfc822')
1993 eq(subpart2.get_default_type(), 'message/rfc822')
1994 neq(container.as_string(0), '''\
1995Content-Type: multipart/digest; boundary="BOUNDARY"
1996MIME-Version: 1.0
1997
1998--BOUNDARY
1999Content-Type: message/rfc822
2000MIME-Version: 1.0
2001
2002Content-Type: text/plain; charset="us-ascii"
2003MIME-Version: 1.0
2004Content-Transfer-Encoding: 7bit
2005
2006message 1
2007
2008--BOUNDARY
2009Content-Type: message/rfc822
2010MIME-Version: 1.0
2011
2012Content-Type: text/plain; charset="us-ascii"
2013MIME-Version: 1.0
2014Content-Transfer-Encoding: 7bit
2015
2016message 2
2017
2018--BOUNDARY--
2019''')
2020 del subpart1['content-type']
2021 del subpart1['mime-version']
2022 del subpart2['content-type']
2023 del subpart2['mime-version']
2024 eq(subpart1.get_content_type(), 'message/rfc822')
2025 eq(subpart1.get_default_type(), 'message/rfc822')
2026 eq(subpart2.get_content_type(), 'message/rfc822')
2027 eq(subpart2.get_default_type(), 'message/rfc822')
2028 neq(container.as_string(0), '''\
2029Content-Type: multipart/digest; boundary="BOUNDARY"
2030MIME-Version: 1.0
2031
2032--BOUNDARY
2033
2034Content-Type: text/plain; charset="us-ascii"
2035MIME-Version: 1.0
2036Content-Transfer-Encoding: 7bit
2037
2038message 1
2039
2040--BOUNDARY
2041
2042Content-Type: text/plain; charset="us-ascii"
2043MIME-Version: 1.0
2044Content-Transfer-Encoding: 7bit
2045
2046message 2
2047
2048--BOUNDARY--
2049''')
2050
2051 def test_mime_attachments_in_constructor(self):
2052 eq = self.assertEqual
2053 text1 = MIMEText('')
2054 text2 = MIMEText('')
2055 msg = MIMEMultipart(_subparts=(text1, text2))
2056 eq(len(msg.get_payload()), 2)
2057 eq(msg.get_payload(0), text1)
2058 eq(msg.get_payload(1), text2)
2059
Christian Heimes587c2bf2008-01-19 16:21:02 +00002060 def test_default_multipart_constructor(self):
2061 msg = MIMEMultipart()
2062 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002063
Ezio Melottib3aedd42010-11-20 19:04:17 +00002064
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002065# A general test of parser->model->generator idempotency. IOW, read a message
2066# in, parse it into a message object tree, then without touching the tree,
2067# regenerate the plain text. The original text and the transformed text
2068# should be identical. Note: that we ignore the Unix-From since that may
2069# contain a changed date.
2070class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002071
2072 linesep = '\n'
2073
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002074 def _msgobj(self, filename):
2075 with openfile(filename) as fp:
2076 data = fp.read()
2077 msg = email.message_from_string(data)
2078 return msg, data
2079
R. David Murray719a4492010-11-21 16:53:48 +00002080 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002081 eq = self.ndiffAssertEqual
2082 s = StringIO()
2083 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002084 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002085 eq(text, s.getvalue())
2086
2087 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002088 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002089 msg, text = self._msgobj('msg_01.txt')
2090 eq(msg.get_content_type(), 'text/plain')
2091 eq(msg.get_content_maintype(), 'text')
2092 eq(msg.get_content_subtype(), 'plain')
2093 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2094 eq(msg.get_param('charset'), 'us-ascii')
2095 eq(msg.preamble, None)
2096 eq(msg.epilogue, None)
2097 self._idempotent(msg, text)
2098
2099 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002100 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002101 msg, text = self._msgobj('msg_03.txt')
2102 eq(msg.get_content_type(), 'text/plain')
2103 eq(msg.get_params(), None)
2104 eq(msg.get_param('charset'), None)
2105 self._idempotent(msg, text)
2106
2107 def test_simple_multipart(self):
2108 msg, text = self._msgobj('msg_04.txt')
2109 self._idempotent(msg, text)
2110
2111 def test_MIME_digest(self):
2112 msg, text = self._msgobj('msg_02.txt')
2113 self._idempotent(msg, text)
2114
2115 def test_long_header(self):
2116 msg, text = self._msgobj('msg_27.txt')
2117 self._idempotent(msg, text)
2118
2119 def test_MIME_digest_with_part_headers(self):
2120 msg, text = self._msgobj('msg_28.txt')
2121 self._idempotent(msg, text)
2122
2123 def test_mixed_with_image(self):
2124 msg, text = self._msgobj('msg_06.txt')
2125 self._idempotent(msg, text)
2126
2127 def test_multipart_report(self):
2128 msg, text = self._msgobj('msg_05.txt')
2129 self._idempotent(msg, text)
2130
2131 def test_dsn(self):
2132 msg, text = self._msgobj('msg_16.txt')
2133 self._idempotent(msg, text)
2134
2135 def test_preamble_epilogue(self):
2136 msg, text = self._msgobj('msg_21.txt')
2137 self._idempotent(msg, text)
2138
2139 def test_multipart_one_part(self):
2140 msg, text = self._msgobj('msg_23.txt')
2141 self._idempotent(msg, text)
2142
2143 def test_multipart_no_parts(self):
2144 msg, text = self._msgobj('msg_24.txt')
2145 self._idempotent(msg, text)
2146
2147 def test_no_start_boundary(self):
2148 msg, text = self._msgobj('msg_31.txt')
2149 self._idempotent(msg, text)
2150
2151 def test_rfc2231_charset(self):
2152 msg, text = self._msgobj('msg_32.txt')
2153 self._idempotent(msg, text)
2154
2155 def test_more_rfc2231_parameters(self):
2156 msg, text = self._msgobj('msg_33.txt')
2157 self._idempotent(msg, text)
2158
2159 def test_text_plain_in_a_multipart_digest(self):
2160 msg, text = self._msgobj('msg_34.txt')
2161 self._idempotent(msg, text)
2162
2163 def test_nested_multipart_mixeds(self):
2164 msg, text = self._msgobj('msg_12a.txt')
2165 self._idempotent(msg, text)
2166
2167 def test_message_external_body_idempotent(self):
2168 msg, text = self._msgobj('msg_36.txt')
2169 self._idempotent(msg, text)
2170
R. David Murray719a4492010-11-21 16:53:48 +00002171 def test_message_delivery_status(self):
2172 msg, text = self._msgobj('msg_43.txt')
2173 self._idempotent(msg, text, unixfrom=True)
2174
R. David Murray96fd54e2010-10-08 15:55:28 +00002175 def test_message_signed_idempotent(self):
2176 msg, text = self._msgobj('msg_45.txt')
2177 self._idempotent(msg, text)
2178
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002179 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002180 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002181 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002182 # Get a message object and reset the seek pointer for other tests
2183 msg, text = self._msgobj('msg_05.txt')
2184 eq(msg.get_content_type(), 'multipart/report')
2185 # Test the Content-Type: parameters
2186 params = {}
2187 for pk, pv in msg.get_params():
2188 params[pk] = pv
2189 eq(params['report-type'], 'delivery-status')
2190 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002191 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2192 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002193 eq(len(msg.get_payload()), 3)
2194 # Make sure the subparts are what we expect
2195 msg1 = msg.get_payload(0)
2196 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002197 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002198 msg2 = msg.get_payload(1)
2199 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002200 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002201 msg3 = msg.get_payload(2)
2202 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002203 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002204 payload = msg3.get_payload()
2205 unless(isinstance(payload, list))
2206 eq(len(payload), 1)
2207 msg4 = payload[0]
2208 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002209 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002210
2211 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002212 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002213 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002214 msg, text = self._msgobj('msg_06.txt')
2215 # Check some of the outer headers
2216 eq(msg.get_content_type(), 'message/rfc822')
2217 # Make sure the payload is a list of exactly one sub-Message, and that
2218 # that submessage has a type of text/plain
2219 payload = msg.get_payload()
2220 unless(isinstance(payload, list))
2221 eq(len(payload), 1)
2222 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002223 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002224 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002225 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002226 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002227
2228
Ezio Melottib3aedd42010-11-20 19:04:17 +00002229
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002230# Test various other bits of the package's functionality
2231class TestMiscellaneous(TestEmailBase):
2232 def test_message_from_string(self):
2233 with openfile('msg_01.txt') as fp:
2234 text = fp.read()
2235 msg = email.message_from_string(text)
2236 s = StringIO()
2237 # Don't wrap/continue long headers since we're trying to test
2238 # idempotency.
2239 g = Generator(s, maxheaderlen=0)
2240 g.flatten(msg)
2241 self.assertEqual(text, s.getvalue())
2242
2243 def test_message_from_file(self):
2244 with openfile('msg_01.txt') as fp:
2245 text = fp.read()
2246 fp.seek(0)
2247 msg = email.message_from_file(fp)
2248 s = StringIO()
2249 # Don't wrap/continue long headers since we're trying to test
2250 # idempotency.
2251 g = Generator(s, maxheaderlen=0)
2252 g.flatten(msg)
2253 self.assertEqual(text, s.getvalue())
2254
2255 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002256 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002257 with openfile('msg_01.txt') as fp:
2258 text = fp.read()
2259
2260 # Create a subclass
2261 class MyMessage(Message):
2262 pass
2263
2264 msg = email.message_from_string(text, MyMessage)
2265 unless(isinstance(msg, MyMessage))
2266 # Try something more complicated
2267 with openfile('msg_02.txt') as fp:
2268 text = fp.read()
2269 msg = email.message_from_string(text, MyMessage)
2270 for subpart in msg.walk():
2271 unless(isinstance(subpart, MyMessage))
2272
2273 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002274 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002275 # Create a subclass
2276 class MyMessage(Message):
2277 pass
2278
2279 with openfile('msg_01.txt') as fp:
2280 msg = email.message_from_file(fp, MyMessage)
2281 unless(isinstance(msg, MyMessage))
2282 # Try something more complicated
2283 with openfile('msg_02.txt') as fp:
2284 msg = email.message_from_file(fp, MyMessage)
2285 for subpart in msg.walk():
2286 unless(isinstance(subpart, MyMessage))
2287
2288 def test__all__(self):
2289 module = __import__('email')
2290 # Can't use sorted() here due to Python 2.3 compatibility
2291 all = module.__all__[:]
2292 all.sort()
2293 self.assertEqual(all, [
2294 'base64mime', 'charset', 'encoders', 'errors', 'generator',
R. David Murray96fd54e2010-10-08 15:55:28 +00002295 'header', 'iterators', 'message', 'message_from_binary_file',
2296 'message_from_bytes', 'message_from_file',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002297 'message_from_string', 'mime', 'parser',
2298 'quoprimime', 'utils',
2299 ])
2300
2301 def test_formatdate(self):
2302 now = time.time()
2303 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2304 time.gmtime(now)[:6])
2305
2306 def test_formatdate_localtime(self):
2307 now = time.time()
2308 self.assertEqual(
2309 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2310 time.localtime(now)[:6])
2311
2312 def test_formatdate_usegmt(self):
2313 now = time.time()
2314 self.assertEqual(
2315 utils.formatdate(now, localtime=False),
2316 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2317 self.assertEqual(
2318 utils.formatdate(now, localtime=False, usegmt=True),
2319 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2320
2321 def test_parsedate_none(self):
2322 self.assertEqual(utils.parsedate(''), None)
2323
2324 def test_parsedate_compact(self):
2325 # The FWS after the comma is optional
2326 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2327 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2328
2329 def test_parsedate_no_dayofweek(self):
2330 eq = self.assertEqual
2331 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2332 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2333
2334 def test_parsedate_compact_no_dayofweek(self):
2335 eq = self.assertEqual
2336 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2337 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2338
R. David Murray4a62e892010-12-23 20:35:46 +00002339 def test_parsedate_no_space_before_positive_offset(self):
2340 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2341 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2342
2343 def test_parsedate_no_space_before_negative_offset(self):
2344 # Issue 1155362: we already handled '+' for this case.
2345 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2346 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2347
2348
R David Murrayaccd1c02011-03-13 20:06:23 -04002349 def test_parsedate_accepts_time_with_dots(self):
2350 eq = self.assertEqual
2351 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2352 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2353 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2354 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2355
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002356 def test_parsedate_acceptable_to_time_functions(self):
2357 eq = self.assertEqual
2358 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2359 t = int(time.mktime(timetup))
2360 eq(time.localtime(t)[:6], timetup[:6])
2361 eq(int(time.strftime('%Y', timetup)), 2003)
2362 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2363 t = int(time.mktime(timetup[:9]))
2364 eq(time.localtime(t)[:6], timetup[:6])
2365 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2366
R. David Murray219d1c82010-08-25 00:45:55 +00002367 def test_parsedate_y2k(self):
2368 """Test for parsing a date with a two-digit year.
2369
2370 Parsing a date with a two-digit year should return the correct
2371 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2372 obsoletes RFC822) requires four-digit years.
2373
2374 """
2375 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2376 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2377 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2378 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2379
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002380 def test_parseaddr_empty(self):
2381 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2382 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2383
2384 def test_noquote_dump(self):
2385 self.assertEqual(
2386 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2387 'A Silly Person <person@dom.ain>')
2388
2389 def test_escape_dump(self):
2390 self.assertEqual(
2391 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2392 r'"A \(Very\) Silly Person" <person@dom.ain>')
2393 a = r'A \(Special\) Person'
2394 b = 'person@dom.ain'
2395 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2396
2397 def test_escape_backslashes(self):
2398 self.assertEqual(
2399 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2400 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2401 a = r'Arthur \Backslash\ Foobar'
2402 b = 'person@dom.ain'
2403 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2404
2405 def test_name_with_dot(self):
2406 x = 'John X. Doe <jxd@example.com>'
2407 y = '"John X. Doe" <jxd@example.com>'
2408 a, b = ('John X. Doe', 'jxd@example.com')
2409 self.assertEqual(utils.parseaddr(x), (a, b))
2410 self.assertEqual(utils.parseaddr(y), (a, b))
2411 # formataddr() quotes the name if there's a dot in it
2412 self.assertEqual(utils.formataddr((a, b)), y)
2413
R. David Murray5397e862010-10-02 15:58:26 +00002414 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2415 # issue 10005. Note that in the third test the second pair of
2416 # backslashes is not actually a quoted pair because it is not inside a
2417 # comment or quoted string: the address being parsed has a quoted
2418 # string containing a quoted backslash, followed by 'example' and two
2419 # backslashes, followed by another quoted string containing a space and
2420 # the word 'example'. parseaddr copies those two backslashes
2421 # literally. Per rfc5322 this is not technically correct since a \ may
2422 # not appear in an address outside of a quoted string. It is probably
2423 # a sensible Postel interpretation, though.
2424 eq = self.assertEqual
2425 eq(utils.parseaddr('""example" example"@example.com'),
2426 ('', '""example" example"@example.com'))
2427 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2428 ('', '"\\"example\\" example"@example.com'))
2429 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2430 ('', '"\\\\"example\\\\" example"@example.com'))
2431
R. David Murray63563cd2010-12-18 18:25:38 +00002432 def test_parseaddr_preserves_spaces_in_local_part(self):
2433 # issue 9286. A normal RFC5322 local part should not contain any
2434 # folding white space, but legacy local parts can (they are a sequence
2435 # of atoms, not dotatoms). On the other hand we strip whitespace from
2436 # before the @ and around dots, on the assumption that the whitespace
2437 # around the punctuation is a mistake in what would otherwise be
2438 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2439 self.assertEqual(('', "merwok wok@xample.com"),
2440 utils.parseaddr("merwok wok@xample.com"))
2441 self.assertEqual(('', "merwok wok@xample.com"),
2442 utils.parseaddr("merwok wok@xample.com"))
2443 self.assertEqual(('', "merwok wok@xample.com"),
2444 utils.parseaddr(" merwok wok @xample.com"))
2445 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2446 utils.parseaddr('merwok"wok" wok@xample.com'))
2447 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2448 utils.parseaddr('merwok. wok . wok@xample.com'))
2449
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002450 def test_multiline_from_comment(self):
2451 x = """\
2452Foo
2453\tBar <foo@example.com>"""
2454 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2455
2456 def test_quote_dump(self):
2457 self.assertEqual(
2458 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2459 r'"A Silly; Person" <person@dom.ain>')
2460
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002461 def test_charset_richcomparisons(self):
2462 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002463 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002464 cset1 = Charset()
2465 cset2 = Charset()
2466 eq(cset1, 'us-ascii')
2467 eq(cset1, 'US-ASCII')
2468 eq(cset1, 'Us-AsCiI')
2469 eq('us-ascii', cset1)
2470 eq('US-ASCII', cset1)
2471 eq('Us-AsCiI', cset1)
2472 ne(cset1, 'usascii')
2473 ne(cset1, 'USASCII')
2474 ne(cset1, 'UsAsCiI')
2475 ne('usascii', cset1)
2476 ne('USASCII', cset1)
2477 ne('UsAsCiI', cset1)
2478 eq(cset1, cset2)
2479 eq(cset2, cset1)
2480
2481 def test_getaddresses(self):
2482 eq = self.assertEqual
2483 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2484 'Bud Person <bperson@dom.ain>']),
2485 [('Al Person', 'aperson@dom.ain'),
2486 ('Bud Person', 'bperson@dom.ain')])
2487
2488 def test_getaddresses_nasty(self):
2489 eq = self.assertEqual
2490 eq(utils.getaddresses(['foo: ;']), [('', '')])
2491 eq(utils.getaddresses(
2492 ['[]*-- =~$']),
2493 [('', ''), ('', ''), ('', '*--')])
2494 eq(utils.getaddresses(
2495 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2496 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2497
2498 def test_getaddresses_embedded_comment(self):
2499 """Test proper handling of a nested comment"""
2500 eq = self.assertEqual
2501 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2502 eq(addrs[0][1], 'foo@bar.com')
2503
2504 def test_utils_quote_unquote(self):
2505 eq = self.assertEqual
2506 msg = Message()
2507 msg.add_header('content-disposition', 'attachment',
2508 filename='foo\\wacky"name')
2509 eq(msg.get_filename(), 'foo\\wacky"name')
2510
2511 def test_get_body_encoding_with_bogus_charset(self):
2512 charset = Charset('not a charset')
2513 self.assertEqual(charset.get_body_encoding(), 'base64')
2514
2515 def test_get_body_encoding_with_uppercase_charset(self):
2516 eq = self.assertEqual
2517 msg = Message()
2518 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2519 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2520 charsets = msg.get_charsets()
2521 eq(len(charsets), 1)
2522 eq(charsets[0], 'utf-8')
2523 charset = Charset(charsets[0])
2524 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002525 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002526 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2527 eq(msg.get_payload(decode=True), b'hello world')
2528 eq(msg['content-transfer-encoding'], 'base64')
2529 # Try another one
2530 msg = Message()
2531 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2532 charsets = msg.get_charsets()
2533 eq(len(charsets), 1)
2534 eq(charsets[0], 'us-ascii')
2535 charset = Charset(charsets[0])
2536 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2537 msg.set_payload('hello world', charset=charset)
2538 eq(msg.get_payload(), 'hello world')
2539 eq(msg['content-transfer-encoding'], '7bit')
2540
2541 def test_charsets_case_insensitive(self):
2542 lc = Charset('us-ascii')
2543 uc = Charset('US-ASCII')
2544 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2545
2546 def test_partial_falls_inside_message_delivery_status(self):
2547 eq = self.ndiffAssertEqual
2548 # The Parser interface provides chunks of data to FeedParser in 8192
2549 # byte gulps. SF bug #1076485 found one of those chunks inside
2550 # message/delivery-status header block, which triggered an
2551 # unreadline() of NeedMoreData.
2552 msg = self._msgobj('msg_43.txt')
2553 sfp = StringIO()
2554 iterators._structure(msg, sfp)
2555 eq(sfp.getvalue(), """\
2556multipart/report
2557 text/plain
2558 message/delivery-status
2559 text/plain
2560 text/plain
2561 text/plain
2562 text/plain
2563 text/plain
2564 text/plain
2565 text/plain
2566 text/plain
2567 text/plain
2568 text/plain
2569 text/plain
2570 text/plain
2571 text/plain
2572 text/plain
2573 text/plain
2574 text/plain
2575 text/plain
2576 text/plain
2577 text/plain
2578 text/plain
2579 text/plain
2580 text/plain
2581 text/plain
2582 text/plain
2583 text/plain
2584 text/plain
2585 text/rfc822-headers
2586""")
2587
R. David Murraya0b44b52010-12-02 21:47:19 +00002588 def test_make_msgid_domain(self):
2589 self.assertEqual(
2590 email.utils.make_msgid(domain='testdomain-string')[-19:],
2591 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002592
Ezio Melottib3aedd42010-11-20 19:04:17 +00002593
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002594# Test the iterator/generators
2595class TestIterators(TestEmailBase):
2596 def test_body_line_iterator(self):
2597 eq = self.assertEqual
2598 neq = self.ndiffAssertEqual
2599 # First a simple non-multipart message
2600 msg = self._msgobj('msg_01.txt')
2601 it = iterators.body_line_iterator(msg)
2602 lines = list(it)
2603 eq(len(lines), 6)
2604 neq(EMPTYSTRING.join(lines), msg.get_payload())
2605 # Now a more complicated multipart
2606 msg = self._msgobj('msg_02.txt')
2607 it = iterators.body_line_iterator(msg)
2608 lines = list(it)
2609 eq(len(lines), 43)
2610 with openfile('msg_19.txt') as fp:
2611 neq(EMPTYSTRING.join(lines), fp.read())
2612
2613 def test_typed_subpart_iterator(self):
2614 eq = self.assertEqual
2615 msg = self._msgobj('msg_04.txt')
2616 it = iterators.typed_subpart_iterator(msg, 'text')
2617 lines = []
2618 subparts = 0
2619 for subpart in it:
2620 subparts += 1
2621 lines.append(subpart.get_payload())
2622 eq(subparts, 2)
2623 eq(EMPTYSTRING.join(lines), """\
2624a simple kind of mirror
2625to reflect upon our own
2626a simple kind of mirror
2627to reflect upon our own
2628""")
2629
2630 def test_typed_subpart_iterator_default_type(self):
2631 eq = self.assertEqual
2632 msg = self._msgobj('msg_03.txt')
2633 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2634 lines = []
2635 subparts = 0
2636 for subpart in it:
2637 subparts += 1
2638 lines.append(subpart.get_payload())
2639 eq(subparts, 1)
2640 eq(EMPTYSTRING.join(lines), """\
2641
2642Hi,
2643
2644Do you like this message?
2645
2646-Me
2647""")
2648
R. David Murray45bf773f2010-07-17 01:19:57 +00002649 def test_pushCR_LF(self):
2650 '''FeedParser BufferedSubFile.push() assumed it received complete
2651 line endings. A CR ending one push() followed by a LF starting
2652 the next push() added an empty line.
2653 '''
2654 imt = [
2655 ("a\r \n", 2),
2656 ("b", 0),
2657 ("c\n", 1),
2658 ("", 0),
2659 ("d\r\n", 1),
2660 ("e\r", 0),
2661 ("\nf", 1),
2662 ("\r\n", 1),
2663 ]
2664 from email.feedparser import BufferedSubFile, NeedMoreData
2665 bsf = BufferedSubFile()
2666 om = []
2667 nt = 0
2668 for il, n in imt:
2669 bsf.push(il)
2670 nt += n
2671 n1 = 0
2672 while True:
2673 ol = bsf.readline()
2674 if ol == NeedMoreData:
2675 break
2676 om.append(ol)
2677 n1 += 1
2678 self.assertTrue(n == n1)
2679 self.assertTrue(len(om) == nt)
2680 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2681
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002682
Ezio Melottib3aedd42010-11-20 19:04:17 +00002683
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002684class TestParsers(TestEmailBase):
2685 def test_header_parser(self):
2686 eq = self.assertEqual
2687 # Parse only the headers of a complex multipart MIME document
2688 with openfile('msg_02.txt') as fp:
2689 msg = HeaderParser().parse(fp)
2690 eq(msg['from'], 'ppp-request@zzz.org')
2691 eq(msg['to'], 'ppp@zzz.org')
2692 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002693 self.assertFalse(msg.is_multipart())
2694 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002695
2696 def test_whitespace_continuation(self):
2697 eq = self.assertEqual
2698 # This message contains a line after the Subject: header that has only
2699 # whitespace, but it is not empty!
2700 msg = email.message_from_string("""\
2701From: aperson@dom.ain
2702To: bperson@dom.ain
2703Subject: the next line has a space on it
2704\x20
2705Date: Mon, 8 Apr 2002 15:09:19 -0400
2706Message-ID: spam
2707
2708Here's the message body
2709""")
2710 eq(msg['subject'], 'the next line has a space on it\n ')
2711 eq(msg['message-id'], 'spam')
2712 eq(msg.get_payload(), "Here's the message body\n")
2713
2714 def test_whitespace_continuation_last_header(self):
2715 eq = self.assertEqual
2716 # Like the previous test, but the subject line is the last
2717 # header.
2718 msg = email.message_from_string("""\
2719From: aperson@dom.ain
2720To: bperson@dom.ain
2721Date: Mon, 8 Apr 2002 15:09:19 -0400
2722Message-ID: spam
2723Subject: the next line has a space on it
2724\x20
2725
2726Here's the message body
2727""")
2728 eq(msg['subject'], 'the next line has a space on it\n ')
2729 eq(msg['message-id'], 'spam')
2730 eq(msg.get_payload(), "Here's the message body\n")
2731
2732 def test_crlf_separation(self):
2733 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002734 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002735 msg = Parser().parse(fp)
2736 eq(len(msg.get_payload()), 2)
2737 part1 = msg.get_payload(0)
2738 eq(part1.get_content_type(), 'text/plain')
2739 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2740 part2 = msg.get_payload(1)
2741 eq(part2.get_content_type(), 'application/riscos')
2742
R. David Murray8451c4b2010-10-23 22:19:56 +00002743 def test_crlf_flatten(self):
2744 # Using newline='\n' preserves the crlfs in this input file.
2745 with openfile('msg_26.txt', newline='\n') as fp:
2746 text = fp.read()
2747 msg = email.message_from_string(text)
2748 s = StringIO()
2749 g = Generator(s)
2750 g.flatten(msg, linesep='\r\n')
2751 self.assertEqual(s.getvalue(), text)
2752
2753 maxDiff = None
2754
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002755 def test_multipart_digest_with_extra_mime_headers(self):
2756 eq = self.assertEqual
2757 neq = self.ndiffAssertEqual
2758 with openfile('msg_28.txt') as fp:
2759 msg = email.message_from_file(fp)
2760 # Structure is:
2761 # multipart/digest
2762 # message/rfc822
2763 # text/plain
2764 # message/rfc822
2765 # text/plain
2766 eq(msg.is_multipart(), 1)
2767 eq(len(msg.get_payload()), 2)
2768 part1 = msg.get_payload(0)
2769 eq(part1.get_content_type(), 'message/rfc822')
2770 eq(part1.is_multipart(), 1)
2771 eq(len(part1.get_payload()), 1)
2772 part1a = part1.get_payload(0)
2773 eq(part1a.is_multipart(), 0)
2774 eq(part1a.get_content_type(), 'text/plain')
2775 neq(part1a.get_payload(), 'message 1\n')
2776 # next message/rfc822
2777 part2 = msg.get_payload(1)
2778 eq(part2.get_content_type(), 'message/rfc822')
2779 eq(part2.is_multipart(), 1)
2780 eq(len(part2.get_payload()), 1)
2781 part2a = part2.get_payload(0)
2782 eq(part2a.is_multipart(), 0)
2783 eq(part2a.get_content_type(), 'text/plain')
2784 neq(part2a.get_payload(), 'message 2\n')
2785
2786 def test_three_lines(self):
2787 # A bug report by Andrew McNamara
2788 lines = ['From: Andrew Person <aperson@dom.ain',
2789 'Subject: Test',
2790 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2791 msg = email.message_from_string(NL.join(lines))
2792 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
2793
2794 def test_strip_line_feed_and_carriage_return_in_headers(self):
2795 eq = self.assertEqual
2796 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
2797 value1 = 'text'
2798 value2 = 'more text'
2799 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
2800 value1, value2)
2801 msg = email.message_from_string(m)
2802 eq(msg.get('Header'), value1)
2803 eq(msg.get('Next-Header'), value2)
2804
2805 def test_rfc2822_header_syntax(self):
2806 eq = self.assertEqual
2807 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2808 msg = email.message_from_string(m)
2809 eq(len(msg), 3)
2810 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
2811 eq(msg.get_payload(), 'body')
2812
2813 def test_rfc2822_space_not_allowed_in_header(self):
2814 eq = self.assertEqual
2815 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2816 msg = email.message_from_string(m)
2817 eq(len(msg.keys()), 0)
2818
2819 def test_rfc2822_one_character_header(self):
2820 eq = self.assertEqual
2821 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
2822 msg = email.message_from_string(m)
2823 headers = msg.keys()
2824 headers.sort()
2825 eq(headers, ['A', 'B', 'CC'])
2826 eq(msg.get_payload(), 'body')
2827
R. David Murray45e0e142010-06-16 02:19:40 +00002828 def test_CRLFLF_at_end_of_part(self):
2829 # issue 5610: feedparser should not eat two chars from body part ending
2830 # with "\r\n\n".
2831 m = (
2832 "From: foo@bar.com\n"
2833 "To: baz\n"
2834 "Mime-Version: 1.0\n"
2835 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
2836 "\n"
2837 "--BOUNDARY\n"
2838 "Content-Type: text/plain\n"
2839 "\n"
2840 "body ending with CRLF newline\r\n"
2841 "\n"
2842 "--BOUNDARY--\n"
2843 )
2844 msg = email.message_from_string(m)
2845 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002846
Ezio Melottib3aedd42010-11-20 19:04:17 +00002847
R. David Murray96fd54e2010-10-08 15:55:28 +00002848class Test8BitBytesHandling(unittest.TestCase):
2849 # In Python3 all input is string, but that doesn't work if the actual input
2850 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
2851 # decode byte streams using the surrogateescape error handler, and
2852 # reconvert to binary at appropriate places if we detect surrogates. This
2853 # doesn't allow us to transform headers with 8bit bytes (they get munged),
2854 # but it does allow us to parse and preserve them, and to decode body
2855 # parts that use an 8bit CTE.
2856
2857 bodytest_msg = textwrap.dedent("""\
2858 From: foo@bar.com
2859 To: baz
2860 Mime-Version: 1.0
2861 Content-Type: text/plain; charset={charset}
2862 Content-Transfer-Encoding: {cte}
2863
2864 {bodyline}
2865 """)
2866
2867 def test_known_8bit_CTE(self):
2868 m = self.bodytest_msg.format(charset='utf-8',
2869 cte='8bit',
2870 bodyline='pöstal').encode('utf-8')
2871 msg = email.message_from_bytes(m)
2872 self.assertEqual(msg.get_payload(), "pöstal\n")
2873 self.assertEqual(msg.get_payload(decode=True),
2874 "pöstal\n".encode('utf-8'))
2875
2876 def test_unknown_8bit_CTE(self):
2877 m = self.bodytest_msg.format(charset='notavalidcharset',
2878 cte='8bit',
2879 bodyline='pöstal').encode('utf-8')
2880 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00002881 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00002882 self.assertEqual(msg.get_payload(decode=True),
2883 "pöstal\n".encode('utf-8'))
2884
2885 def test_8bit_in_quopri_body(self):
2886 # This is non-RFC compliant data...without 'decode' the library code
2887 # decodes the body using the charset from the headers, and because the
2888 # source byte really is utf-8 this works. This is likely to fail
2889 # against real dirty data (ie: produce mojibake), but the data is
2890 # invalid anyway so it is as good a guess as any. But this means that
2891 # this test just confirms the current behavior; that behavior is not
2892 # necessarily the best possible behavior. With 'decode' it is
2893 # returning the raw bytes, so that test should be of correct behavior,
2894 # or at least produce the same result that email4 did.
2895 m = self.bodytest_msg.format(charset='utf-8',
2896 cte='quoted-printable',
2897 bodyline='p=C3=B6stál').encode('utf-8')
2898 msg = email.message_from_bytes(m)
2899 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
2900 self.assertEqual(msg.get_payload(decode=True),
2901 'pöstál\n'.encode('utf-8'))
2902
2903 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
2904 # This is similar to the previous test, but proves that if the 8bit
2905 # byte is undecodeable in the specified charset, it gets replaced
2906 # by the unicode 'unknown' character. Again, this may or may not
2907 # be the ideal behavior. Note that if decode=False none of the
2908 # decoders will get involved, so this is the only test we need
2909 # for this behavior.
2910 m = self.bodytest_msg.format(charset='ascii',
2911 cte='quoted-printable',
2912 bodyline='p=C3=B6stál').encode('utf-8')
2913 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00002914 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00002915 self.assertEqual(msg.get_payload(decode=True),
2916 'pöstál\n'.encode('utf-8'))
2917
2918 def test_8bit_in_base64_body(self):
2919 # Sticking an 8bit byte in a base64 block makes it undecodable by
2920 # normal means, so the block is returned undecoded, but as bytes.
2921 m = self.bodytest_msg.format(charset='utf-8',
2922 cte='base64',
2923 bodyline='cMO2c3RhbAá=').encode('utf-8')
2924 msg = email.message_from_bytes(m)
2925 self.assertEqual(msg.get_payload(decode=True),
2926 'cMO2c3RhbAá=\n'.encode('utf-8'))
2927
2928 def test_8bit_in_uuencode_body(self):
2929 # Sticking an 8bit byte in a uuencode block makes it undecodable by
2930 # normal means, so the block is returned undecoded, but as bytes.
2931 m = self.bodytest_msg.format(charset='utf-8',
2932 cte='uuencode',
2933 bodyline='<,.V<W1A; á ').encode('utf-8')
2934 msg = email.message_from_bytes(m)
2935 self.assertEqual(msg.get_payload(decode=True),
2936 '<,.V<W1A; á \n'.encode('utf-8'))
2937
2938
R. David Murray92532142011-01-07 23:25:30 +00002939 headertest_headers = (
2940 ('From: foo@bar.com', ('From', 'foo@bar.com')),
2941 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
2942 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
2943 '\tJean de Baddie',
2944 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
2945 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
2946 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
2947 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
2948 )
2949 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
2950 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00002951
2952 def test_get_8bit_header(self):
2953 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00002954 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
2955 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00002956
2957 def test_print_8bit_headers(self):
2958 msg = email.message_from_bytes(self.headertest_msg)
2959 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00002960 textwrap.dedent("""\
2961 From: {}
2962 To: {}
2963 Subject: {}
2964 From: {}
2965
2966 Yes, they are flying.
2967 """).format(*[expected[1] for (_, expected) in
2968 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00002969
2970 def test_values_with_8bit_headers(self):
2971 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00002972 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00002973 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00002974 'b\uFFFD\uFFFDz',
2975 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
2976 'coll\uFFFD\uFFFDgue, le pouf '
2977 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00002978 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00002979 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00002980
2981 def test_items_with_8bit_headers(self):
2982 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00002983 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00002984 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00002985 ('To', 'b\uFFFD\uFFFDz'),
2986 ('Subject', 'Maintenant je vous '
2987 'pr\uFFFD\uFFFDsente '
2988 'mon coll\uFFFD\uFFFDgue, le pouf '
2989 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
2990 '\tJean de Baddie'),
2991 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00002992
2993 def test_get_all_with_8bit_headers(self):
2994 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00002995 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00002996 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00002997 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00002998
R David Murraya2150232011-03-16 21:11:23 -04002999 def test_get_content_type_with_8bit(self):
3000 msg = email.message_from_bytes(textwrap.dedent("""\
3001 Content-Type: text/pl\xA7in; charset=utf-8
3002 """).encode('latin-1'))
3003 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3004 self.assertEqual(msg.get_content_maintype(), "text")
3005 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3006
3007 def test_get_params_with_8bit(self):
3008 msg = email.message_from_bytes(
3009 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3010 self.assertEqual(msg.get_params(header='x-header'),
3011 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3012 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3013 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3014 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3015
3016 def test_get_rfc2231_params_with_8bit(self):
3017 msg = email.message_from_bytes(textwrap.dedent("""\
3018 Content-Type: text/plain; charset=us-ascii;
3019 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3020 ).encode('latin-1'))
3021 self.assertEqual(msg.get_param('title'),
3022 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3023
3024 def test_set_rfc2231_params_with_8bit(self):
3025 msg = email.message_from_bytes(textwrap.dedent("""\
3026 Content-Type: text/plain; charset=us-ascii;
3027 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3028 ).encode('latin-1'))
3029 msg.set_param('title', 'test')
3030 self.assertEqual(msg.get_param('title'), 'test')
3031
3032 def test_del_rfc2231_params_with_8bit(self):
3033 msg = email.message_from_bytes(textwrap.dedent("""\
3034 Content-Type: text/plain; charset=us-ascii;
3035 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3036 ).encode('latin-1'))
3037 msg.del_param('title')
3038 self.assertEqual(msg.get_param('title'), None)
3039 self.assertEqual(msg.get_content_maintype(), 'text')
3040
3041 def test_get_payload_with_8bit_cte_header(self):
3042 msg = email.message_from_bytes(textwrap.dedent("""\
3043 Content-Transfer-Encoding: b\xa7se64
3044 Content-Type: text/plain; charset=latin-1
3045
3046 payload
3047 """).encode('latin-1'))
3048 self.assertEqual(msg.get_payload(), 'payload\n')
3049 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3050
R. David Murray96fd54e2010-10-08 15:55:28 +00003051 non_latin_bin_msg = textwrap.dedent("""\
3052 From: foo@bar.com
3053 To: báz
3054 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3055 \tJean de Baddie
3056 Mime-Version: 1.0
3057 Content-Type: text/plain; charset="utf-8"
3058 Content-Transfer-Encoding: 8bit
3059
3060 Да, они летят.
3061 """).encode('utf-8')
3062
3063 def test_bytes_generator(self):
3064 msg = email.message_from_bytes(self.non_latin_bin_msg)
3065 out = BytesIO()
3066 email.generator.BytesGenerator(out).flatten(msg)
3067 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3068
R. David Murray7372a072011-01-26 21:21:32 +00003069 def test_bytes_generator_handles_None_body(self):
3070 #Issue 11019
3071 msg = email.message.Message()
3072 out = BytesIO()
3073 email.generator.BytesGenerator(out).flatten(msg)
3074 self.assertEqual(out.getvalue(), b"\n")
3075
R. David Murray92532142011-01-07 23:25:30 +00003076 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003077 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003078 To: =?unknown-8bit?q?b=C3=A1z?=
3079 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3080 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3081 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003082 Mime-Version: 1.0
3083 Content-Type: text/plain; charset="utf-8"
3084 Content-Transfer-Encoding: base64
3085
3086 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3087 """)
3088
3089 def test_generator_handles_8bit(self):
3090 msg = email.message_from_bytes(self.non_latin_bin_msg)
3091 out = StringIO()
3092 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003093 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003094
3095 def test_bytes_generator_with_unix_from(self):
3096 # The unixfrom contains a current date, so we can't check it
3097 # literally. Just make sure the first word is 'From' and the
3098 # rest of the message matches the input.
3099 msg = email.message_from_bytes(self.non_latin_bin_msg)
3100 out = BytesIO()
3101 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3102 lines = out.getvalue().split(b'\n')
3103 self.assertEqual(lines[0].split()[0], b'From')
3104 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3105
R. David Murray92532142011-01-07 23:25:30 +00003106 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3107 non_latin_bin_msg_as7bit[2:4] = [
3108 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3109 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3110 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3111
R. David Murray96fd54e2010-10-08 15:55:28 +00003112 def test_message_from_binary_file(self):
3113 fn = 'test.msg'
3114 self.addCleanup(unlink, fn)
3115 with open(fn, 'wb') as testfile:
3116 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003117 with open(fn, 'rb') as testfile:
3118 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003119 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3120
3121 latin_bin_msg = textwrap.dedent("""\
3122 From: foo@bar.com
3123 To: Dinsdale
3124 Subject: Nudge nudge, wink, wink
3125 Mime-Version: 1.0
3126 Content-Type: text/plain; charset="latin-1"
3127 Content-Transfer-Encoding: 8bit
3128
3129 oh là là, know what I mean, know what I mean?
3130 """).encode('latin-1')
3131
3132 latin_bin_msg_as7bit = textwrap.dedent("""\
3133 From: foo@bar.com
3134 To: Dinsdale
3135 Subject: Nudge nudge, wink, wink
3136 Mime-Version: 1.0
3137 Content-Type: text/plain; charset="iso-8859-1"
3138 Content-Transfer-Encoding: quoted-printable
3139
3140 oh l=E0 l=E0, know what I mean, know what I mean?
3141 """)
3142
3143 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3144 m = email.message_from_bytes(self.latin_bin_msg)
3145 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3146
3147 def test_decoded_generator_emits_unicode_body(self):
3148 m = email.message_from_bytes(self.latin_bin_msg)
3149 out = StringIO()
3150 email.generator.DecodedGenerator(out).flatten(m)
3151 #DecodedHeader output contains an extra blank line compared
3152 #to the input message. RDM: not sure if this is a bug or not,
3153 #but it is not specific to the 8bit->7bit conversion.
3154 self.assertEqual(out.getvalue(),
3155 self.latin_bin_msg.decode('latin-1')+'\n')
3156
3157 def test_bytes_feedparser(self):
3158 bfp = email.feedparser.BytesFeedParser()
3159 for i in range(0, len(self.latin_bin_msg), 10):
3160 bfp.feed(self.latin_bin_msg[i:i+10])
3161 m = bfp.close()
3162 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3163
R. David Murray8451c4b2010-10-23 22:19:56 +00003164 def test_crlf_flatten(self):
3165 with openfile('msg_26.txt', 'rb') as fp:
3166 text = fp.read()
3167 msg = email.message_from_bytes(text)
3168 s = BytesIO()
3169 g = email.generator.BytesGenerator(s)
3170 g.flatten(msg, linesep='\r\n')
3171 self.assertEqual(s.getvalue(), text)
3172 maxDiff = None
3173
Ezio Melottib3aedd42010-11-20 19:04:17 +00003174
R. David Murray719a4492010-11-21 16:53:48 +00003175class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003176
R. David Murraye5db2632010-11-20 15:10:13 +00003177 maxDiff = None
3178
R. David Murray96fd54e2010-10-08 15:55:28 +00003179 def _msgobj(self, filename):
3180 with openfile(filename, 'rb') as fp:
3181 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003182 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003183 msg = email.message_from_bytes(data)
3184 return msg, data
3185
R. David Murray719a4492010-11-21 16:53:48 +00003186 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003187 b = BytesIO()
3188 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003189 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R. David Murraye5db2632010-11-20 15:10:13 +00003190 self.assertByteStringsEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003191
R. David Murraye5db2632010-11-20 15:10:13 +00003192 def assertByteStringsEqual(self, str1, str2):
R. David Murray719a4492010-11-21 16:53:48 +00003193 # Not using self.blinesep here is intentional. This way the output
3194 # is more useful when the failure results in mixed line endings.
R. David Murray96fd54e2010-10-08 15:55:28 +00003195 self.assertListEqual(str1.split(b'\n'), str2.split(b'\n'))
3196
3197
R. David Murray719a4492010-11-21 16:53:48 +00003198class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3199 TestIdempotent):
3200 linesep = '\n'
3201 blinesep = b'\n'
3202 normalize_linesep_regex = re.compile(br'\r\n')
3203
3204
3205class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3206 TestIdempotent):
3207 linesep = '\r\n'
3208 blinesep = b'\r\n'
3209 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3210
Ezio Melottib3aedd42010-11-20 19:04:17 +00003211
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003212class TestBase64(unittest.TestCase):
3213 def test_len(self):
3214 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003215 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003216 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003217 for size in range(15):
3218 if size == 0 : bsize = 0
3219 elif size <= 3 : bsize = 4
3220 elif size <= 6 : bsize = 8
3221 elif size <= 9 : bsize = 12
3222 elif size <= 12: bsize = 16
3223 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003224 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003225
3226 def test_decode(self):
3227 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003228 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003229 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003230
3231 def test_encode(self):
3232 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003233 eq(base64mime.body_encode(b''), b'')
3234 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003235 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003236 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003237 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003238 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003239eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3240eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3241eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3242eHh4eCB4eHh4IA==
3243""")
3244 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003245 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003246 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003247eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3248eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3249eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3250eHh4eCB4eHh4IA==\r
3251""")
3252
3253 def test_header_encode(self):
3254 eq = self.assertEqual
3255 he = base64mime.header_encode
3256 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003257 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3258 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003259 # Test the charset option
3260 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3261 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003262
3263
Ezio Melottib3aedd42010-11-20 19:04:17 +00003264
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003265class TestQuopri(unittest.TestCase):
3266 def setUp(self):
3267 # Set of characters (as byte integers) that don't need to be encoded
3268 # in headers.
3269 self.hlit = list(chain(
3270 range(ord('a'), ord('z') + 1),
3271 range(ord('A'), ord('Z') + 1),
3272 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003273 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003274 # Set of characters (as byte integers) that do need to be encoded in
3275 # headers.
3276 self.hnon = [c for c in range(256) if c not in self.hlit]
3277 assert len(self.hlit) + len(self.hnon) == 256
3278 # Set of characters (as byte integers) that don't need to be encoded
3279 # in bodies.
3280 self.blit = list(range(ord(' '), ord('~') + 1))
3281 self.blit.append(ord('\t'))
3282 self.blit.remove(ord('='))
3283 # Set of characters (as byte integers) that do need to be encoded in
3284 # bodies.
3285 self.bnon = [c for c in range(256) if c not in self.blit]
3286 assert len(self.blit) + len(self.bnon) == 256
3287
Guido van Rossum9604e662007-08-30 03:46:43 +00003288 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003289 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003290 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003291 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003292 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003293 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003294 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003295
Guido van Rossum9604e662007-08-30 03:46:43 +00003296 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003297 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003298 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003299 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003300 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003301 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003302 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003303
3304 def test_header_quopri_len(self):
3305 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003306 eq(quoprimime.header_length(b'hello'), 5)
3307 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003308 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003309 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003310 # =?xxx?q?...?= means 10 extra characters
3311 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003312 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3313 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003314 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003315 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003316 # =?xxx?q?...?= means 10 extra characters
3317 10)
3318 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003319 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003320 'expected length 1 for %r' % chr(c))
3321 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003322 # Space is special; it's encoded to _
3323 if c == ord(' '):
3324 continue
3325 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003326 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003327 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003328
3329 def test_body_quopri_len(self):
3330 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003331 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003332 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003333 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003334 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003335
3336 def test_quote_unquote_idempotent(self):
3337 for x in range(256):
3338 c = chr(x)
3339 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3340
R David Murrayec1b5b82011-03-23 14:19:05 -04003341 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3342 if charset is None:
3343 encoded_header = quoprimime.header_encode(header)
3344 else:
3345 encoded_header = quoprimime.header_encode(header, charset)
3346 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003347
R David Murraycafd79d2011-03-23 15:25:55 -04003348 def test_header_encode_null(self):
3349 self._test_header_encode(b'', '')
3350
R David Murrayec1b5b82011-03-23 14:19:05 -04003351 def test_header_encode_one_word(self):
3352 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3353
3354 def test_header_encode_two_lines(self):
3355 self._test_header_encode(b'hello\nworld',
3356 '=?iso-8859-1?q?hello=0Aworld?=')
3357
3358 def test_header_encode_non_ascii(self):
3359 self._test_header_encode(b'hello\xc7there',
3360 '=?iso-8859-1?q?hello=C7there?=')
3361
3362 def test_header_encode_alt_charset(self):
3363 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3364 charset='iso-8859-2')
3365
3366 def _test_header_decode(self, encoded_header, expected_decoded_header):
3367 decoded_header = quoprimime.header_decode(encoded_header)
3368 self.assertEqual(decoded_header, expected_decoded_header)
3369
3370 def test_header_decode_null(self):
3371 self._test_header_decode('', '')
3372
3373 def test_header_decode_one_word(self):
3374 self._test_header_decode('hello', 'hello')
3375
3376 def test_header_decode_two_lines(self):
3377 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3378
3379 def test_header_decode_non_ascii(self):
3380 self._test_header_decode('hello=C7there', 'hello\xc7there')
3381
3382 def _test_decode(self, encoded, expected_decoded, eol=None):
3383 if eol is None:
3384 decoded = quoprimime.decode(encoded)
3385 else:
3386 decoded = quoprimime.decode(encoded, eol=eol)
3387 self.assertEqual(decoded, expected_decoded)
3388
3389 def test_decode_null_word(self):
3390 self._test_decode('', '')
3391
3392 def test_decode_null_line_null_word(self):
3393 self._test_decode('\r\n', '\n')
3394
3395 def test_decode_one_word(self):
3396 self._test_decode('hello', 'hello')
3397
3398 def test_decode_one_word_eol(self):
3399 self._test_decode('hello', 'hello', eol='X')
3400
3401 def test_decode_one_line(self):
3402 self._test_decode('hello\r\n', 'hello\n')
3403
3404 def test_decode_one_line_lf(self):
3405 self._test_decode('hello\n', 'hello\n')
3406
R David Murraycafd79d2011-03-23 15:25:55 -04003407 def test_decode_one_line_cr(self):
3408 self._test_decode('hello\r', 'hello\n')
3409
3410 def test_decode_one_line_nl(self):
3411 self._test_decode('hello\n', 'helloX', eol='X')
3412
3413 def test_decode_one_line_crnl(self):
3414 self._test_decode('hello\r\n', 'helloX', eol='X')
3415
R David Murrayec1b5b82011-03-23 14:19:05 -04003416 def test_decode_one_line_one_word(self):
3417 self._test_decode('hello\r\nworld', 'hello\nworld')
3418
3419 def test_decode_one_line_one_word_eol(self):
3420 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3421
3422 def test_decode_two_lines(self):
3423 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3424
R David Murraycafd79d2011-03-23 15:25:55 -04003425 def test_decode_two_lines_eol(self):
3426 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3427
R David Murrayec1b5b82011-03-23 14:19:05 -04003428 def test_decode_one_long_line(self):
3429 self._test_decode('Spam' * 250, 'Spam' * 250)
3430
3431 def test_decode_one_space(self):
3432 self._test_decode(' ', '')
3433
3434 def test_decode_multiple_spaces(self):
3435 self._test_decode(' ' * 5, '')
3436
3437 def test_decode_one_line_trailing_spaces(self):
3438 self._test_decode('hello \r\n', 'hello\n')
3439
3440 def test_decode_two_lines_trailing_spaces(self):
3441 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3442
3443 def test_decode_quoted_word(self):
3444 self._test_decode('=22quoted=20words=22', '"quoted words"')
3445
3446 def test_decode_uppercase_quoting(self):
3447 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
3448
3449 def test_decode_lowercase_quoting(self):
3450 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
3451
3452 def test_decode_soft_line_break(self):
3453 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
3454
3455 def test_decode_false_quoting(self):
3456 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
3457
3458 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
3459 kwargs = {}
3460 if maxlinelen is None:
3461 # Use body_encode's default.
3462 maxlinelen = 76
3463 else:
3464 kwargs['maxlinelen'] = maxlinelen
3465 if eol is None:
3466 # Use body_encode's default.
3467 eol = '\n'
3468 else:
3469 kwargs['eol'] = eol
3470 encoded_body = quoprimime.body_encode(body, **kwargs)
3471 self.assertEqual(encoded_body, expected_encoded_body)
3472 if eol == '\n' or eol == '\r\n':
3473 # We know how to split the result back into lines, so maxlinelen
3474 # can be checked.
3475 for line in encoded_body.splitlines():
3476 self.assertLessEqual(len(line), maxlinelen)
3477
3478 def test_encode_null(self):
3479 self._test_encode('', '')
3480
3481 def test_encode_null_lines(self):
3482 self._test_encode('\n\n', '\n\n')
3483
3484 def test_encode_one_line(self):
3485 self._test_encode('hello\n', 'hello\n')
3486
3487 def test_encode_one_line_crlf(self):
3488 self._test_encode('hello\r\n', 'hello\n')
3489
3490 def test_encode_one_line_eol(self):
3491 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
3492
3493 def test_encode_one_space(self):
3494 self._test_encode(' ', '=20')
3495
3496 def test_encode_one_line_one_space(self):
3497 self._test_encode(' \n', '=20\n')
3498
R David Murrayb938c8c2011-03-24 12:19:26 -04003499# XXX: body_encode() expect strings, but uses ord(char) from these strings
3500# to index into a 256-entry list. For code points above 255, this will fail.
3501# Should there be a check for 8-bit only ord() values in body, or at least
3502# a comment about the expected input?
3503
3504 def test_encode_two_lines_one_space(self):
3505 self._test_encode(' \n \n', '=20\n=20\n')
3506
R David Murrayec1b5b82011-03-23 14:19:05 -04003507 def test_encode_one_word_trailing_spaces(self):
3508 self._test_encode('hello ', 'hello =20')
3509
3510 def test_encode_one_line_trailing_spaces(self):
3511 self._test_encode('hello \n', 'hello =20\n')
3512
3513 def test_encode_one_word_trailing_tab(self):
3514 self._test_encode('hello \t', 'hello =09')
3515
3516 def test_encode_one_line_trailing_tab(self):
3517 self._test_encode('hello \t\n', 'hello =09\n')
3518
3519 def test_encode_trailing_space_before_maxlinelen(self):
3520 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
3521
R David Murrayb938c8c2011-03-24 12:19:26 -04003522 def test_encode_trailing_space_at_maxlinelen(self):
3523 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
3524
R David Murrayec1b5b82011-03-23 14:19:05 -04003525 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04003526 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
3527
3528 def test_encode_whitespace_lines(self):
3529 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04003530
3531 def test_encode_quoted_equals(self):
3532 self._test_encode('a = b', 'a =3D b')
3533
3534 def test_encode_one_long_string(self):
3535 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
3536
3537 def test_encode_one_long_line(self):
3538 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3539
3540 def test_encode_one_very_long_line(self):
3541 self._test_encode('x' * 200 + '\n',
3542 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
3543
3544 def test_encode_one_long_line(self):
3545 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3546
3547 def test_encode_shortest_maxlinelen(self):
3548 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003549
R David Murrayb938c8c2011-03-24 12:19:26 -04003550 def test_encode_maxlinelen_too_small(self):
3551 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
3552
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003553 def test_encode(self):
3554 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003555 eq(quoprimime.body_encode(''), '')
3556 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003557 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003558 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003559 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003560 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003561xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3562 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3563x xxxx xxxx xxxx xxxx=20""")
3564 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003565 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3566 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003567xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3568 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3569x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003570 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003571one line
3572
3573two line"""), """\
3574one line
3575
3576two line""")
3577
3578
Ezio Melottib3aedd42010-11-20 19:04:17 +00003579
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003580# Test the Charset class
3581class TestCharset(unittest.TestCase):
3582 def tearDown(self):
3583 from email import charset as CharsetModule
3584 try:
3585 del CharsetModule.CHARSETS['fake']
3586 except KeyError:
3587 pass
3588
Guido van Rossum9604e662007-08-30 03:46:43 +00003589 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003590 eq = self.assertEqual
3591 # Make sure us-ascii = no Unicode conversion
3592 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00003593 eq(c.header_encode('Hello World!'), 'Hello World!')
3594 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003595 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00003596 self.assertRaises(UnicodeError, c.header_encode, s)
3597 c = Charset('utf-8')
3598 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003599
3600 def test_body_encode(self):
3601 eq = self.assertEqual
3602 # Try a charset with QP body encoding
3603 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003604 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003605 # Try a charset with Base64 body encoding
3606 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003607 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003608 # Try a charset with None body encoding
3609 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003610 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003611 # Try the convert argument, where input codec != output codec
3612 c = Charset('euc-jp')
3613 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00003614 # XXX FIXME
3615## try:
3616## eq('\x1b$B5FCO;~IW\x1b(B',
3617## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
3618## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
3619## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
3620## except LookupError:
3621## # We probably don't have the Japanese codecs installed
3622## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003623 # Testing SF bug #625509, which we have to fake, since there are no
3624 # built-in encodings where the header encoding is QP but the body
3625 # encoding is not.
3626 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04003627 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003628 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04003629 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003630
3631 def test_unicode_charset_name(self):
3632 charset = Charset('us-ascii')
3633 self.assertEqual(str(charset), 'us-ascii')
3634 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
3635
3636
Ezio Melottib3aedd42010-11-20 19:04:17 +00003637
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003638# Test multilingual MIME headers.
3639class TestHeader(TestEmailBase):
3640 def test_simple(self):
3641 eq = self.ndiffAssertEqual
3642 h = Header('Hello World!')
3643 eq(h.encode(), 'Hello World!')
3644 h.append(' Goodbye World!')
3645 eq(h.encode(), 'Hello World! Goodbye World!')
3646
3647 def test_simple_surprise(self):
3648 eq = self.ndiffAssertEqual
3649 h = Header('Hello World!')
3650 eq(h.encode(), 'Hello World!')
3651 h.append('Goodbye World!')
3652 eq(h.encode(), 'Hello World! Goodbye World!')
3653
3654 def test_header_needs_no_decoding(self):
3655 h = 'no decoding needed'
3656 self.assertEqual(decode_header(h), [(h, None)])
3657
3658 def test_long(self):
3659 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
3660 maxlinelen=76)
3661 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003662 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003663
3664 def test_multilingual(self):
3665 eq = self.ndiffAssertEqual
3666 g = Charset("iso-8859-1")
3667 cz = Charset("iso-8859-2")
3668 utf8 = Charset("utf-8")
3669 g_head = (b'Die Mieter treten hier ein werden mit einem '
3670 b'Foerderband komfortabel den Korridor entlang, '
3671 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
3672 b'gegen die rotierenden Klingen bef\xf6rdert. ')
3673 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
3674 b'd\xf9vtipu.. ')
3675 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
3676 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
3677 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
3678 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
3679 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
3680 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
3681 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
3682 '\u3044\u307e\u3059\u3002')
3683 h = Header(g_head, g)
3684 h.append(cz_head, cz)
3685 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00003686 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003687 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00003688=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
3689 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
3690 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
3691 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003692 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
3693 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
3694 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
3695 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00003696 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
3697 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
3698 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
3699 decoded = decode_header(enc)
3700 eq(len(decoded), 3)
3701 eq(decoded[0], (g_head, 'iso-8859-1'))
3702 eq(decoded[1], (cz_head, 'iso-8859-2'))
3703 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003704 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00003705 eq(ustr,
3706 (b'Die Mieter treten hier ein werden mit einem Foerderband '
3707 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
3708 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
3709 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
3710 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
3711 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
3712 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
3713 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
3714 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
3715 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
3716 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
3717 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
3718 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
3719 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
3720 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
3721 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
3722 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003723 # Test make_header()
3724 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00003725 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003726
3727 def test_empty_header_encode(self):
3728 h = Header()
3729 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00003730
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003731 def test_header_ctor_default_args(self):
3732 eq = self.ndiffAssertEqual
3733 h = Header()
3734 eq(h, '')
3735 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00003736 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003737
3738 def test_explicit_maxlinelen(self):
3739 eq = self.ndiffAssertEqual
3740 hstr = ('A very long line that must get split to something other '
3741 'than at the 76th character boundary to test the non-default '
3742 'behavior')
3743 h = Header(hstr)
3744 eq(h.encode(), '''\
3745A very long line that must get split to something other than at the 76th
3746 character boundary to test the non-default behavior''')
3747 eq(str(h), hstr)
3748 h = Header(hstr, header_name='Subject')
3749 eq(h.encode(), '''\
3750A very long line that must get split to something other than at the
3751 76th character boundary to test the non-default behavior''')
3752 eq(str(h), hstr)
3753 h = Header(hstr, maxlinelen=1024, header_name='Subject')
3754 eq(h.encode(), hstr)
3755 eq(str(h), hstr)
3756
Guido van Rossum9604e662007-08-30 03:46:43 +00003757 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003758 eq = self.ndiffAssertEqual
3759 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003760 x = 'xxxx ' * 20
3761 h.append(x)
3762 s = h.encode()
3763 eq(s, """\
3764=?iso-8859-1?q?xxx?=
3765 =?iso-8859-1?q?x_?=
3766 =?iso-8859-1?q?xx?=
3767 =?iso-8859-1?q?xx?=
3768 =?iso-8859-1?q?_x?=
3769 =?iso-8859-1?q?xx?=
3770 =?iso-8859-1?q?x_?=
3771 =?iso-8859-1?q?xx?=
3772 =?iso-8859-1?q?xx?=
3773 =?iso-8859-1?q?_x?=
3774 =?iso-8859-1?q?xx?=
3775 =?iso-8859-1?q?x_?=
3776 =?iso-8859-1?q?xx?=
3777 =?iso-8859-1?q?xx?=
3778 =?iso-8859-1?q?_x?=
3779 =?iso-8859-1?q?xx?=
3780 =?iso-8859-1?q?x_?=
3781 =?iso-8859-1?q?xx?=
3782 =?iso-8859-1?q?xx?=
3783 =?iso-8859-1?q?_x?=
3784 =?iso-8859-1?q?xx?=
3785 =?iso-8859-1?q?x_?=
3786 =?iso-8859-1?q?xx?=
3787 =?iso-8859-1?q?xx?=
3788 =?iso-8859-1?q?_x?=
3789 =?iso-8859-1?q?xx?=
3790 =?iso-8859-1?q?x_?=
3791 =?iso-8859-1?q?xx?=
3792 =?iso-8859-1?q?xx?=
3793 =?iso-8859-1?q?_x?=
3794 =?iso-8859-1?q?xx?=
3795 =?iso-8859-1?q?x_?=
3796 =?iso-8859-1?q?xx?=
3797 =?iso-8859-1?q?xx?=
3798 =?iso-8859-1?q?_x?=
3799 =?iso-8859-1?q?xx?=
3800 =?iso-8859-1?q?x_?=
3801 =?iso-8859-1?q?xx?=
3802 =?iso-8859-1?q?xx?=
3803 =?iso-8859-1?q?_x?=
3804 =?iso-8859-1?q?xx?=
3805 =?iso-8859-1?q?x_?=
3806 =?iso-8859-1?q?xx?=
3807 =?iso-8859-1?q?xx?=
3808 =?iso-8859-1?q?_x?=
3809 =?iso-8859-1?q?xx?=
3810 =?iso-8859-1?q?x_?=
3811 =?iso-8859-1?q?xx?=
3812 =?iso-8859-1?q?xx?=
3813 =?iso-8859-1?q?_?=""")
3814 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003815 h = Header(charset='iso-8859-1', maxlinelen=40)
3816 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003817 s = h.encode()
3818 eq(s, """\
3819=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
3820 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
3821 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
3822 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
3823 =?iso-8859-1?q?_xxxx_xxxx_?=""")
3824 eq(x, str(make_header(decode_header(s))))
3825
3826 def test_base64_splittable(self):
3827 eq = self.ndiffAssertEqual
3828 h = Header(charset='koi8-r', maxlinelen=20)
3829 x = 'xxxx ' * 20
3830 h.append(x)
3831 s = h.encode()
3832 eq(s, """\
3833=?koi8-r?b?eHh4?=
3834 =?koi8-r?b?eCB4?=
3835 =?koi8-r?b?eHh4?=
3836 =?koi8-r?b?IHh4?=
3837 =?koi8-r?b?eHgg?=
3838 =?koi8-r?b?eHh4?=
3839 =?koi8-r?b?eCB4?=
3840 =?koi8-r?b?eHh4?=
3841 =?koi8-r?b?IHh4?=
3842 =?koi8-r?b?eHgg?=
3843 =?koi8-r?b?eHh4?=
3844 =?koi8-r?b?eCB4?=
3845 =?koi8-r?b?eHh4?=
3846 =?koi8-r?b?IHh4?=
3847 =?koi8-r?b?eHgg?=
3848 =?koi8-r?b?eHh4?=
3849 =?koi8-r?b?eCB4?=
3850 =?koi8-r?b?eHh4?=
3851 =?koi8-r?b?IHh4?=
3852 =?koi8-r?b?eHgg?=
3853 =?koi8-r?b?eHh4?=
3854 =?koi8-r?b?eCB4?=
3855 =?koi8-r?b?eHh4?=
3856 =?koi8-r?b?IHh4?=
3857 =?koi8-r?b?eHgg?=
3858 =?koi8-r?b?eHh4?=
3859 =?koi8-r?b?eCB4?=
3860 =?koi8-r?b?eHh4?=
3861 =?koi8-r?b?IHh4?=
3862 =?koi8-r?b?eHgg?=
3863 =?koi8-r?b?eHh4?=
3864 =?koi8-r?b?eCB4?=
3865 =?koi8-r?b?eHh4?=
3866 =?koi8-r?b?IA==?=""")
3867 eq(x, str(make_header(decode_header(s))))
3868 h = Header(charset='koi8-r', maxlinelen=40)
3869 h.append(x)
3870 s = h.encode()
3871 eq(s, """\
3872=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
3873 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
3874 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
3875 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
3876 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
3877 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
3878 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003879
3880 def test_us_ascii_header(self):
3881 eq = self.assertEqual
3882 s = 'hello'
3883 x = decode_header(s)
3884 eq(x, [('hello', None)])
3885 h = make_header(x)
3886 eq(s, h.encode())
3887
3888 def test_string_charset(self):
3889 eq = self.assertEqual
3890 h = Header()
3891 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00003892 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003893
3894## def test_unicode_error(self):
3895## raises = self.assertRaises
3896## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
3897## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
3898## h = Header()
3899## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
3900## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
3901## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
3902
3903 def test_utf8_shortest(self):
3904 eq = self.assertEqual
3905 h = Header('p\xf6stal', 'utf-8')
3906 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
3907 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
3908 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
3909
3910 def test_bad_8bit_header(self):
3911 raises = self.assertRaises
3912 eq = self.assertEqual
3913 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
3914 raises(UnicodeError, Header, x)
3915 h = Header()
3916 raises(UnicodeError, h.append, x)
3917 e = x.decode('utf-8', 'replace')
3918 eq(str(Header(x, errors='replace')), e)
3919 h.append(x, errors='replace')
3920 eq(str(h), e)
3921
R David Murray041015c2011-03-25 15:10:55 -04003922 def test_escaped_8bit_header(self):
3923 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
3924 x = x.decode('ascii', 'surrogateescape')
3925 h = Header(x, charset=email.charset.UNKNOWN8BIT)
3926 self.assertEqual(str(h),
3927 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
3928 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
3929
3930 def test_modify_returned_list_does_not_change_header(self):
3931 h = Header('test')
3932 chunks = email.header.decode_header(h)
3933 chunks.append(('ascii', 'test2'))
3934 self.assertEqual(str(h), 'test')
3935
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003936 def test_encoded_adjacent_nonencoded(self):
3937 eq = self.assertEqual
3938 h = Header()
3939 h.append('hello', 'iso-8859-1')
3940 h.append('world')
3941 s = h.encode()
3942 eq(s, '=?iso-8859-1?q?hello?= world')
3943 h = make_header(decode_header(s))
3944 eq(h.encode(), s)
3945
3946 def test_whitespace_eater(self):
3947 eq = self.assertEqual
3948 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
3949 parts = decode_header(s)
3950 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
3951 hdr = make_header(parts)
3952 eq(hdr.encode(),
3953 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
3954
3955 def test_broken_base64_header(self):
3956 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00003957 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003958 raises(errors.HeaderParseError, decode_header, s)
3959
R. David Murray477efb32011-01-05 01:39:32 +00003960 def test_shift_jis_charset(self):
3961 h = Header('文', charset='shift_jis')
3962 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
3963
R David Murrayde912762011-03-16 18:26:23 -04003964 def test_flatten_header_with_no_value(self):
3965 # Issue 11401 (regression from email 4.x) Note that the space after
3966 # the header doesn't reflect the input, but this is also the way
3967 # email 4.x behaved. At some point it would be nice to fix that.
3968 msg = email.message_from_string("EmptyHeader:")
3969 self.assertEqual(str(msg), "EmptyHeader: \n\n")
3970
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003971
Ezio Melottib3aedd42010-11-20 19:04:17 +00003972
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003973# Test RFC 2231 header parameters (en/de)coding
3974class TestRFC2231(TestEmailBase):
3975 def test_get_param(self):
3976 eq = self.assertEqual
3977 msg = self._msgobj('msg_29.txt')
3978 eq(msg.get_param('title'),
3979 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3980 eq(msg.get_param('title', unquote=False),
3981 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
3982
3983 def test_set_param(self):
3984 eq = self.ndiffAssertEqual
3985 msg = Message()
3986 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3987 charset='us-ascii')
3988 eq(msg.get_param('title'),
3989 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
3990 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3991 charset='us-ascii', language='en')
3992 eq(msg.get_param('title'),
3993 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3994 msg = self._msgobj('msg_01.txt')
3995 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3996 charset='us-ascii', language='en')
3997 eq(msg.as_string(maxheaderlen=78), """\
3998Return-Path: <bbb@zzz.org>
3999Delivered-To: bbb@zzz.org
4000Received: by mail.zzz.org (Postfix, from userid 889)
4001\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4002MIME-Version: 1.0
4003Content-Transfer-Encoding: 7bit
4004Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4005From: bbb@ddd.com (John X. Doe)
4006To: bbb@zzz.org
4007Subject: This is a test message
4008Date: Fri, 4 May 2001 14:05:44 -0400
4009Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004010 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004011
4012
4013Hi,
4014
4015Do you like this message?
4016
4017-Me
4018""")
4019
4020 def test_del_param(self):
4021 eq = self.ndiffAssertEqual
4022 msg = self._msgobj('msg_01.txt')
4023 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4024 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4025 charset='us-ascii', language='en')
4026 msg.del_param('foo', header='Content-Type')
4027 eq(msg.as_string(maxheaderlen=78), """\
4028Return-Path: <bbb@zzz.org>
4029Delivered-To: bbb@zzz.org
4030Received: by mail.zzz.org (Postfix, from userid 889)
4031\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4032MIME-Version: 1.0
4033Content-Transfer-Encoding: 7bit
4034Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4035From: bbb@ddd.com (John X. Doe)
4036To: bbb@zzz.org
4037Subject: This is a test message
4038Date: Fri, 4 May 2001 14:05:44 -0400
4039Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004040 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004041
4042
4043Hi,
4044
4045Do you like this message?
4046
4047-Me
4048""")
4049
4050 def test_rfc2231_get_content_charset(self):
4051 eq = self.assertEqual
4052 msg = self._msgobj('msg_32.txt')
4053 eq(msg.get_content_charset(), 'us-ascii')
4054
R. David Murraydfd7eb02010-12-24 22:36:49 +00004055 def test_rfc2231_parse_rfc_quoting(self):
4056 m = textwrap.dedent('''\
4057 Content-Disposition: inline;
4058 \tfilename*0*=''This%20is%20even%20more%20;
4059 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4060 \tfilename*2="is it not.pdf"
4061
4062 ''')
4063 msg = email.message_from_string(m)
4064 self.assertEqual(msg.get_filename(),
4065 'This is even more ***fun*** is it not.pdf')
4066 self.assertEqual(m, msg.as_string())
4067
4068 def test_rfc2231_parse_extra_quoting(self):
4069 m = textwrap.dedent('''\
4070 Content-Disposition: inline;
4071 \tfilename*0*="''This%20is%20even%20more%20";
4072 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4073 \tfilename*2="is it not.pdf"
4074
4075 ''')
4076 msg = email.message_from_string(m)
4077 self.assertEqual(msg.get_filename(),
4078 'This is even more ***fun*** is it not.pdf')
4079 self.assertEqual(m, msg.as_string())
4080
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004081 def test_rfc2231_no_language_or_charset(self):
4082 m = '''\
4083Content-Transfer-Encoding: 8bit
4084Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4085Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4086
4087'''
4088 msg = email.message_from_string(m)
4089 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004090 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004091 self.assertEqual(
4092 param,
4093 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4094
4095 def test_rfc2231_no_language_or_charset_in_filename(self):
4096 m = '''\
4097Content-Disposition: inline;
4098\tfilename*0*="''This%20is%20even%20more%20";
4099\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4100\tfilename*2="is it not.pdf"
4101
4102'''
4103 msg = email.message_from_string(m)
4104 self.assertEqual(msg.get_filename(),
4105 'This is even more ***fun*** is it not.pdf')
4106
4107 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4108 m = '''\
4109Content-Disposition: inline;
4110\tfilename*0*="''This%20is%20even%20more%20";
4111\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4112\tfilename*2="is it not.pdf"
4113
4114'''
4115 msg = email.message_from_string(m)
4116 self.assertEqual(msg.get_filename(),
4117 'This is even more ***fun*** is it not.pdf')
4118
4119 def test_rfc2231_partly_encoded(self):
4120 m = '''\
4121Content-Disposition: inline;
4122\tfilename*0="''This%20is%20even%20more%20";
4123\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4124\tfilename*2="is it not.pdf"
4125
4126'''
4127 msg = email.message_from_string(m)
4128 self.assertEqual(
4129 msg.get_filename(),
4130 'This%20is%20even%20more%20***fun*** is it not.pdf')
4131
4132 def test_rfc2231_partly_nonencoded(self):
4133 m = '''\
4134Content-Disposition: inline;
4135\tfilename*0="This%20is%20even%20more%20";
4136\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4137\tfilename*2="is it not.pdf"
4138
4139'''
4140 msg = email.message_from_string(m)
4141 self.assertEqual(
4142 msg.get_filename(),
4143 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4144
4145 def test_rfc2231_no_language_or_charset_in_boundary(self):
4146 m = '''\
4147Content-Type: multipart/alternative;
4148\tboundary*0*="''This%20is%20even%20more%20";
4149\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4150\tboundary*2="is it not.pdf"
4151
4152'''
4153 msg = email.message_from_string(m)
4154 self.assertEqual(msg.get_boundary(),
4155 'This is even more ***fun*** is it not.pdf')
4156
4157 def test_rfc2231_no_language_or_charset_in_charset(self):
4158 # This is a nonsensical charset value, but tests the code anyway
4159 m = '''\
4160Content-Type: text/plain;
4161\tcharset*0*="This%20is%20even%20more%20";
4162\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4163\tcharset*2="is it not.pdf"
4164
4165'''
4166 msg = email.message_from_string(m)
4167 self.assertEqual(msg.get_content_charset(),
4168 'this is even more ***fun*** is it not.pdf')
4169
4170 def test_rfc2231_bad_encoding_in_filename(self):
4171 m = '''\
4172Content-Disposition: inline;
4173\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4174\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4175\tfilename*2="is it not.pdf"
4176
4177'''
4178 msg = email.message_from_string(m)
4179 self.assertEqual(msg.get_filename(),
4180 'This is even more ***fun*** is it not.pdf')
4181
4182 def test_rfc2231_bad_encoding_in_charset(self):
4183 m = """\
4184Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4185
4186"""
4187 msg = email.message_from_string(m)
4188 # This should return None because non-ascii characters in the charset
4189 # are not allowed.
4190 self.assertEqual(msg.get_content_charset(), None)
4191
4192 def test_rfc2231_bad_character_in_charset(self):
4193 m = """\
4194Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4195
4196"""
4197 msg = email.message_from_string(m)
4198 # This should return None because non-ascii characters in the charset
4199 # are not allowed.
4200 self.assertEqual(msg.get_content_charset(), None)
4201
4202 def test_rfc2231_bad_character_in_filename(self):
4203 m = '''\
4204Content-Disposition: inline;
4205\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4206\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4207\tfilename*2*="is it not.pdf%E2"
4208
4209'''
4210 msg = email.message_from_string(m)
4211 self.assertEqual(msg.get_filename(),
4212 'This is even more ***fun*** is it not.pdf\ufffd')
4213
4214 def test_rfc2231_unknown_encoding(self):
4215 m = """\
4216Content-Transfer-Encoding: 8bit
4217Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4218
4219"""
4220 msg = email.message_from_string(m)
4221 self.assertEqual(msg.get_filename(), 'myfile.txt')
4222
4223 def test_rfc2231_single_tick_in_filename_extended(self):
4224 eq = self.assertEqual
4225 m = """\
4226Content-Type: application/x-foo;
4227\tname*0*=\"Frank's\"; name*1*=\" Document\"
4228
4229"""
4230 msg = email.message_from_string(m)
4231 charset, language, s = msg.get_param('name')
4232 eq(charset, None)
4233 eq(language, None)
4234 eq(s, "Frank's Document")
4235
4236 def test_rfc2231_single_tick_in_filename(self):
4237 m = """\
4238Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4239
4240"""
4241 msg = email.message_from_string(m)
4242 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004243 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004244 self.assertEqual(param, "Frank's Document")
4245
4246 def test_rfc2231_tick_attack_extended(self):
4247 eq = self.assertEqual
4248 m = """\
4249Content-Type: application/x-foo;
4250\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4251
4252"""
4253 msg = email.message_from_string(m)
4254 charset, language, s = msg.get_param('name')
4255 eq(charset, 'us-ascii')
4256 eq(language, 'en-us')
4257 eq(s, "Frank's Document")
4258
4259 def test_rfc2231_tick_attack(self):
4260 m = """\
4261Content-Type: application/x-foo;
4262\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4263
4264"""
4265 msg = email.message_from_string(m)
4266 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004267 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004268 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4269
4270 def test_rfc2231_no_extended_values(self):
4271 eq = self.assertEqual
4272 m = """\
4273Content-Type: application/x-foo; name=\"Frank's Document\"
4274
4275"""
4276 msg = email.message_from_string(m)
4277 eq(msg.get_param('name'), "Frank's Document")
4278
4279 def test_rfc2231_encoded_then_unencoded_segments(self):
4280 eq = self.assertEqual
4281 m = """\
4282Content-Type: application/x-foo;
4283\tname*0*=\"us-ascii'en-us'My\";
4284\tname*1=\" Document\";
4285\tname*2*=\" For You\"
4286
4287"""
4288 msg = email.message_from_string(m)
4289 charset, language, s = msg.get_param('name')
4290 eq(charset, 'us-ascii')
4291 eq(language, 'en-us')
4292 eq(s, 'My Document For You')
4293
4294 def test_rfc2231_unencoded_then_encoded_segments(self):
4295 eq = self.assertEqual
4296 m = """\
4297Content-Type: application/x-foo;
4298\tname*0=\"us-ascii'en-us'My\";
4299\tname*1*=\" Document\";
4300\tname*2*=\" For You\"
4301
4302"""
4303 msg = email.message_from_string(m)
4304 charset, language, s = msg.get_param('name')
4305 eq(charset, 'us-ascii')
4306 eq(language, 'en-us')
4307 eq(s, 'My Document For You')
4308
4309
Ezio Melottib3aedd42010-11-20 19:04:17 +00004310
R. David Murraya8f480f2010-01-16 18:30:03 +00004311# Tests to ensure that signed parts of an email are completely preserved, as
4312# required by RFC1847 section 2.1. Note that these are incomplete, because the
4313# email package does not currently always preserve the body. See issue 1670765.
4314class TestSigned(TestEmailBase):
4315
4316 def _msg_and_obj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -04004317 with openfile(filename) as fp:
R. David Murraya8f480f2010-01-16 18:30:03 +00004318 original = fp.read()
4319 msg = email.message_from_string(original)
4320 return original, msg
4321
4322 def _signed_parts_eq(self, original, result):
4323 # Extract the first mime part of each message
4324 import re
4325 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4326 inpart = repart.search(original).group(2)
4327 outpart = repart.search(result).group(2)
4328 self.assertEqual(outpart, inpart)
4329
4330 def test_long_headers_as_string(self):
4331 original, msg = self._msg_and_obj('msg_45.txt')
4332 result = msg.as_string()
4333 self._signed_parts_eq(original, result)
4334
4335 def test_long_headers_as_string_maxheaderlen(self):
4336 original, msg = self._msg_and_obj('msg_45.txt')
4337 result = msg.as_string(maxheaderlen=60)
4338 self._signed_parts_eq(original, result)
4339
4340 def test_long_headers_flatten(self):
4341 original, msg = self._msg_and_obj('msg_45.txt')
4342 fp = StringIO()
4343 Generator(fp).flatten(msg)
4344 result = fp.getvalue()
4345 self._signed_parts_eq(original, result)
4346
4347
Ezio Melottib3aedd42010-11-20 19:04:17 +00004348
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004349if __name__ == '__main__':
R David Murray9aaba782011-03-21 17:17:06 -04004350 unittest.main()