blob: 404282b0ba6c48bbb9be106001d5034db4f7582f [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R David Murray28346b82011-03-31 11:40:20 -040039from test.support import run_unittest, unlink
R David Murraya256bac2011-03-31 12:20:23 -040040from test.test_email import openfile, TestEmailBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +000041
42NL = '\n'
43EMPTYSTRING = ''
44SPACE = ' '
45
46
Guido van Rossum8b3febe2007-08-30 01:15:14 +000047# Test various aspects of the Message class's API
48class TestMessageAPI(TestEmailBase):
49 def test_get_all(self):
50 eq = self.assertEqual
51 msg = self._msgobj('msg_20.txt')
52 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
53 eq(msg.get_all('xx', 'n/a'), 'n/a')
54
R. David Murraye5db2632010-11-20 15:10:13 +000055 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000056 eq = self.assertEqual
57 msg = Message()
58 eq(msg.get_charset(), None)
59 charset = Charset('iso-8859-1')
60 msg.set_charset(charset)
61 eq(msg['mime-version'], '1.0')
62 eq(msg.get_content_type(), 'text/plain')
63 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
64 eq(msg.get_param('charset'), 'iso-8859-1')
65 eq(msg['content-transfer-encoding'], 'quoted-printable')
66 eq(msg.get_charset().input_charset, 'iso-8859-1')
67 # Remove the charset
68 msg.set_charset(None)
69 eq(msg.get_charset(), None)
70 eq(msg['content-type'], 'text/plain')
71 # Try adding a charset when there's already MIME headers present
72 msg = Message()
73 msg['MIME-Version'] = '2.0'
74 msg['Content-Type'] = 'text/x-weird'
75 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
76 msg.set_charset(charset)
77 eq(msg['mime-version'], '2.0')
78 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
79 eq(msg['content-transfer-encoding'], 'quinted-puntable')
80
81 def test_set_charset_from_string(self):
82 eq = self.assertEqual
83 msg = Message()
84 msg.set_charset('us-ascii')
85 eq(msg.get_charset().input_charset, 'us-ascii')
86 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
87
88 def test_set_payload_with_charset(self):
89 msg = Message()
90 charset = Charset('iso-8859-1')
91 msg.set_payload('This is a string payload', charset)
92 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
93
94 def test_get_charsets(self):
95 eq = self.assertEqual
96
97 msg = self._msgobj('msg_08.txt')
98 charsets = msg.get_charsets()
99 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
100
101 msg = self._msgobj('msg_09.txt')
102 charsets = msg.get_charsets('dingbat')
103 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
104 'koi8-r'])
105
106 msg = self._msgobj('msg_12.txt')
107 charsets = msg.get_charsets()
108 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
109 'iso-8859-3', 'us-ascii', 'koi8-r'])
110
111 def test_get_filename(self):
112 eq = self.assertEqual
113
114 msg = self._msgobj('msg_04.txt')
115 filenames = [p.get_filename() for p in msg.get_payload()]
116 eq(filenames, ['msg.txt', 'msg.txt'])
117
118 msg = self._msgobj('msg_07.txt')
119 subpart = msg.get_payload(1)
120 eq(subpart.get_filename(), 'dingusfish.gif')
121
122 def test_get_filename_with_name_parameter(self):
123 eq = self.assertEqual
124
125 msg = self._msgobj('msg_44.txt')
126 filenames = [p.get_filename() for p in msg.get_payload()]
127 eq(filenames, ['msg.txt', 'msg.txt'])
128
129 def test_get_boundary(self):
130 eq = self.assertEqual
131 msg = self._msgobj('msg_07.txt')
132 # No quotes!
133 eq(msg.get_boundary(), 'BOUNDARY')
134
135 def test_set_boundary(self):
136 eq = self.assertEqual
137 # This one has no existing boundary parameter, but the Content-Type:
138 # header appears fifth.
139 msg = self._msgobj('msg_01.txt')
140 msg.set_boundary('BOUNDARY')
141 header, value = msg.items()[4]
142 eq(header.lower(), 'content-type')
143 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
144 # This one has a Content-Type: header, with a boundary, stuck in the
145 # middle of its headers. Make sure the order is preserved; it should
146 # be fifth.
147 msg = self._msgobj('msg_04.txt')
148 msg.set_boundary('BOUNDARY')
149 header, value = msg.items()[4]
150 eq(header.lower(), 'content-type')
151 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
152 # And this one has no Content-Type: header at all.
153 msg = self._msgobj('msg_03.txt')
154 self.assertRaises(errors.HeaderParseError,
155 msg.set_boundary, 'BOUNDARY')
156
R. David Murray73a559d2010-12-21 18:07:59 +0000157 def test_make_boundary(self):
158 msg = MIMEMultipart('form-data')
159 # Note that when the boundary gets created is an implementation
160 # detail and might change.
161 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
162 # Trigger creation of boundary
163 msg.as_string()
164 self.assertEqual(msg.items()[0][1][:33],
165 'multipart/form-data; boundary="==')
166 # XXX: there ought to be tests of the uniqueness of the boundary, too.
167
R. David Murray57c45ac2010-02-21 04:39:40 +0000168 def test_message_rfc822_only(self):
169 # Issue 7970: message/rfc822 not in multipart parsed by
170 # HeaderParser caused an exception when flattened.
R David Murray28346b82011-03-31 11:40:20 -0400171 with openfile('msg_46.txt') as fp:
Brett Cannon384917a2010-10-29 23:08:36 +0000172 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000173 parser = HeaderParser()
174 msg = parser.parsestr(msgdata)
175 out = StringIO()
176 gen = Generator(out, True, 0)
177 gen.flatten(msg, False)
178 self.assertEqual(out.getvalue(), msgdata)
179
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000180 def test_get_decoded_payload(self):
181 eq = self.assertEqual
182 msg = self._msgobj('msg_10.txt')
183 # The outer message is a multipart
184 eq(msg.get_payload(decode=True), None)
185 # Subpart 1 is 7bit encoded
186 eq(msg.get_payload(0).get_payload(decode=True),
187 b'This is a 7bit encoded message.\n')
188 # Subpart 2 is quopri
189 eq(msg.get_payload(1).get_payload(decode=True),
190 b'\xa1This is a Quoted Printable encoded message!\n')
191 # Subpart 3 is base64
192 eq(msg.get_payload(2).get_payload(decode=True),
193 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000194 # Subpart 4 is base64 with a trailing newline, which
195 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000196 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000197 b'This is a Base64 encoded message.\n')
198 # Subpart 5 has no Content-Transfer-Encoding: header.
199 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000200 b'This has no Content-Transfer-Encoding: header.\n')
201
202 def test_get_decoded_uu_payload(self):
203 eq = self.assertEqual
204 msg = Message()
205 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
206 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
207 msg['content-transfer-encoding'] = cte
208 eq(msg.get_payload(decode=True), b'hello world')
209 # Now try some bogus data
210 msg.set_payload('foo')
211 eq(msg.get_payload(decode=True), b'foo')
212
213 def test_decoded_generator(self):
214 eq = self.assertEqual
215 msg = self._msgobj('msg_07.txt')
216 with openfile('msg_17.txt') as fp:
217 text = fp.read()
218 s = StringIO()
219 g = DecodedGenerator(s)
220 g.flatten(msg)
221 eq(s.getvalue(), text)
222
223 def test__contains__(self):
224 msg = Message()
225 msg['From'] = 'Me'
226 msg['to'] = 'You'
227 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000228 self.assertTrue('from' in msg)
229 self.assertTrue('From' in msg)
230 self.assertTrue('FROM' in msg)
231 self.assertTrue('to' in msg)
232 self.assertTrue('To' in msg)
233 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000234
235 def test_as_string(self):
236 eq = self.ndiffAssertEqual
237 msg = self._msgobj('msg_01.txt')
238 with openfile('msg_01.txt') as fp:
239 text = fp.read()
240 eq(text, str(msg))
241 fullrepr = msg.as_string(unixfrom=True)
242 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000243 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000244 eq(text, NL.join(lines[1:]))
245
246 def test_bad_param(self):
247 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
248 self.assertEqual(msg.get_param('baz'), '')
249
250 def test_missing_filename(self):
251 msg = email.message_from_string("From: foo\n")
252 self.assertEqual(msg.get_filename(), None)
253
254 def test_bogus_filename(self):
255 msg = email.message_from_string(
256 "Content-Disposition: blarg; filename\n")
257 self.assertEqual(msg.get_filename(), '')
258
259 def test_missing_boundary(self):
260 msg = email.message_from_string("From: foo\n")
261 self.assertEqual(msg.get_boundary(), None)
262
263 def test_get_params(self):
264 eq = self.assertEqual
265 msg = email.message_from_string(
266 'X-Header: foo=one; bar=two; baz=three\n')
267 eq(msg.get_params(header='x-header'),
268 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
269 msg = email.message_from_string(
270 'X-Header: foo; bar=one; baz=two\n')
271 eq(msg.get_params(header='x-header'),
272 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
273 eq(msg.get_params(), None)
274 msg = email.message_from_string(
275 'X-Header: foo; bar="one"; baz=two\n')
276 eq(msg.get_params(header='x-header'),
277 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
278
279 def test_get_param_liberal(self):
280 msg = Message()
281 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
282 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
283
284 def test_get_param(self):
285 eq = self.assertEqual
286 msg = email.message_from_string(
287 "X-Header: foo=one; bar=two; baz=three\n")
288 eq(msg.get_param('bar', header='x-header'), 'two')
289 eq(msg.get_param('quuz', header='x-header'), None)
290 eq(msg.get_param('quuz'), None)
291 msg = email.message_from_string(
292 'X-Header: foo; bar="one"; baz=two\n')
293 eq(msg.get_param('foo', header='x-header'), '')
294 eq(msg.get_param('bar', header='x-header'), 'one')
295 eq(msg.get_param('baz', header='x-header'), 'two')
296 # XXX: We are not RFC-2045 compliant! We cannot parse:
297 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
298 # msg.get_param("weird")
299 # yet.
300
301 def test_get_param_funky_continuation_lines(self):
302 msg = self._msgobj('msg_22.txt')
303 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
304
305 def test_get_param_with_semis_in_quotes(self):
306 msg = email.message_from_string(
307 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
308 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
309 self.assertEqual(msg.get_param('name', unquote=False),
310 '"Jim&amp;&amp;Jill"')
311
R. David Murrayd48739f2010-04-14 18:59:18 +0000312 def test_get_param_with_quotes(self):
313 msg = email.message_from_string(
314 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
315 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
316 msg = email.message_from_string(
317 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
318 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
319
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000320 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000321 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000322 msg = email.message_from_string('Header: exists')
323 unless('header' in msg)
324 unless('Header' in msg)
325 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000326 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000327
328 def test_set_param(self):
329 eq = self.assertEqual
330 msg = Message()
331 msg.set_param('charset', 'iso-2022-jp')
332 eq(msg.get_param('charset'), 'iso-2022-jp')
333 msg.set_param('importance', 'high value')
334 eq(msg.get_param('importance'), 'high value')
335 eq(msg.get_param('importance', unquote=False), '"high value"')
336 eq(msg.get_params(), [('text/plain', ''),
337 ('charset', 'iso-2022-jp'),
338 ('importance', 'high value')])
339 eq(msg.get_params(unquote=False), [('text/plain', ''),
340 ('charset', '"iso-2022-jp"'),
341 ('importance', '"high value"')])
342 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
343 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
344
345 def test_del_param(self):
346 eq = self.assertEqual
347 msg = self._msgobj('msg_05.txt')
348 eq(msg.get_params(),
349 [('multipart/report', ''), ('report-type', 'delivery-status'),
350 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
351 old_val = msg.get_param("report-type")
352 msg.del_param("report-type")
353 eq(msg.get_params(),
354 [('multipart/report', ''),
355 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
356 msg.set_param("report-type", old_val)
357 eq(msg.get_params(),
358 [('multipart/report', ''),
359 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
360 ('report-type', old_val)])
361
362 def test_del_param_on_other_header(self):
363 msg = Message()
364 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
365 msg.del_param('filename', 'content-disposition')
366 self.assertEqual(msg['content-disposition'], 'attachment')
367
368 def test_set_type(self):
369 eq = self.assertEqual
370 msg = Message()
371 self.assertRaises(ValueError, msg.set_type, 'text')
372 msg.set_type('text/plain')
373 eq(msg['content-type'], 'text/plain')
374 msg.set_param('charset', 'us-ascii')
375 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
376 msg.set_type('text/html')
377 eq(msg['content-type'], 'text/html; charset="us-ascii"')
378
379 def test_set_type_on_other_header(self):
380 msg = Message()
381 msg['X-Content-Type'] = 'text/plain'
382 msg.set_type('application/octet-stream', 'X-Content-Type')
383 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
384
385 def test_get_content_type_missing(self):
386 msg = Message()
387 self.assertEqual(msg.get_content_type(), 'text/plain')
388
389 def test_get_content_type_missing_with_default_type(self):
390 msg = Message()
391 msg.set_default_type('message/rfc822')
392 self.assertEqual(msg.get_content_type(), 'message/rfc822')
393
394 def test_get_content_type_from_message_implicit(self):
395 msg = self._msgobj('msg_30.txt')
396 self.assertEqual(msg.get_payload(0).get_content_type(),
397 'message/rfc822')
398
399 def test_get_content_type_from_message_explicit(self):
400 msg = self._msgobj('msg_28.txt')
401 self.assertEqual(msg.get_payload(0).get_content_type(),
402 'message/rfc822')
403
404 def test_get_content_type_from_message_text_plain_implicit(self):
405 msg = self._msgobj('msg_03.txt')
406 self.assertEqual(msg.get_content_type(), 'text/plain')
407
408 def test_get_content_type_from_message_text_plain_explicit(self):
409 msg = self._msgobj('msg_01.txt')
410 self.assertEqual(msg.get_content_type(), 'text/plain')
411
412 def test_get_content_maintype_missing(self):
413 msg = Message()
414 self.assertEqual(msg.get_content_maintype(), 'text')
415
416 def test_get_content_maintype_missing_with_default_type(self):
417 msg = Message()
418 msg.set_default_type('message/rfc822')
419 self.assertEqual(msg.get_content_maintype(), 'message')
420
421 def test_get_content_maintype_from_message_implicit(self):
422 msg = self._msgobj('msg_30.txt')
423 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
424
425 def test_get_content_maintype_from_message_explicit(self):
426 msg = self._msgobj('msg_28.txt')
427 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
428
429 def test_get_content_maintype_from_message_text_plain_implicit(self):
430 msg = self._msgobj('msg_03.txt')
431 self.assertEqual(msg.get_content_maintype(), 'text')
432
433 def test_get_content_maintype_from_message_text_plain_explicit(self):
434 msg = self._msgobj('msg_01.txt')
435 self.assertEqual(msg.get_content_maintype(), 'text')
436
437 def test_get_content_subtype_missing(self):
438 msg = Message()
439 self.assertEqual(msg.get_content_subtype(), 'plain')
440
441 def test_get_content_subtype_missing_with_default_type(self):
442 msg = Message()
443 msg.set_default_type('message/rfc822')
444 self.assertEqual(msg.get_content_subtype(), 'rfc822')
445
446 def test_get_content_subtype_from_message_implicit(self):
447 msg = self._msgobj('msg_30.txt')
448 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
449
450 def test_get_content_subtype_from_message_explicit(self):
451 msg = self._msgobj('msg_28.txt')
452 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
453
454 def test_get_content_subtype_from_message_text_plain_implicit(self):
455 msg = self._msgobj('msg_03.txt')
456 self.assertEqual(msg.get_content_subtype(), 'plain')
457
458 def test_get_content_subtype_from_message_text_plain_explicit(self):
459 msg = self._msgobj('msg_01.txt')
460 self.assertEqual(msg.get_content_subtype(), 'plain')
461
462 def test_get_content_maintype_error(self):
463 msg = Message()
464 msg['Content-Type'] = 'no-slash-in-this-string'
465 self.assertEqual(msg.get_content_maintype(), 'text')
466
467 def test_get_content_subtype_error(self):
468 msg = Message()
469 msg['Content-Type'] = 'no-slash-in-this-string'
470 self.assertEqual(msg.get_content_subtype(), 'plain')
471
472 def test_replace_header(self):
473 eq = self.assertEqual
474 msg = Message()
475 msg.add_header('First', 'One')
476 msg.add_header('Second', 'Two')
477 msg.add_header('Third', 'Three')
478 eq(msg.keys(), ['First', 'Second', 'Third'])
479 eq(msg.values(), ['One', 'Two', 'Three'])
480 msg.replace_header('Second', 'Twenty')
481 eq(msg.keys(), ['First', 'Second', 'Third'])
482 eq(msg.values(), ['One', 'Twenty', 'Three'])
483 msg.add_header('First', 'Eleven')
484 msg.replace_header('First', 'One Hundred')
485 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
486 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
487 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
488
489 def test_broken_base64_payload(self):
490 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
491 msg = Message()
492 msg['content-type'] = 'audio/x-midi'
493 msg['content-transfer-encoding'] = 'base64'
494 msg.set_payload(x)
495 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000496 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000497
R. David Murray7ec754b2010-12-13 23:51:19 +0000498 # Issue 1078919
499 def test_ascii_add_header(self):
500 msg = Message()
501 msg.add_header('Content-Disposition', 'attachment',
502 filename='bud.gif')
503 self.assertEqual('attachment; filename="bud.gif"',
504 msg['Content-Disposition'])
505
506 def test_noascii_add_header(self):
507 msg = Message()
508 msg.add_header('Content-Disposition', 'attachment',
509 filename="Fußballer.ppt")
510 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000511 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000512 msg['Content-Disposition'])
513
514 def test_nonascii_add_header_via_triple(self):
515 msg = Message()
516 msg.add_header('Content-Disposition', 'attachment',
517 filename=('iso-8859-1', '', 'Fußballer.ppt'))
518 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000519 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
520 msg['Content-Disposition'])
521
522 def test_ascii_add_header_with_tspecial(self):
523 msg = Message()
524 msg.add_header('Content-Disposition', 'attachment',
525 filename="windows [filename].ppt")
526 self.assertEqual(
527 'attachment; filename="windows [filename].ppt"',
528 msg['Content-Disposition'])
529
530 def test_nonascii_add_header_with_tspecial(self):
531 msg = Message()
532 msg.add_header('Content-Disposition', 'attachment',
533 filename="Fußballer [filename].ppt")
534 self.assertEqual(
535 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000536 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000537
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000538 # Issue 5871: reject an attempt to embed a header inside a header value
539 # (header injection attack).
540 def test_embeded_header_via_Header_rejected(self):
541 msg = Message()
542 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
543 self.assertRaises(errors.HeaderParseError, msg.as_string)
544
545 def test_embeded_header_via_string_rejected(self):
546 msg = Message()
547 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
548 self.assertRaises(errors.HeaderParseError, msg.as_string)
549
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000550# Test the email.encoders module
551class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400552
553 def test_EncodersEncode_base64(self):
554 with openfile('PyBanner048.gif', 'rb') as fp:
555 bindata = fp.read()
556 mimed = email.mime.image.MIMEImage(bindata)
557 base64ed = mimed.get_payload()
558 # the transfer-encoded body lines should all be <=76 characters
559 lines = base64ed.split('\n')
560 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
561
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000562 def test_encode_empty_payload(self):
563 eq = self.assertEqual
564 msg = Message()
565 msg.set_charset('us-ascii')
566 eq(msg['content-transfer-encoding'], '7bit')
567
568 def test_default_cte(self):
569 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000570 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000571 msg = MIMEText('hello world')
572 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000573 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000574 msg = MIMEText('hello \xf8 world')
575 eq(msg['content-transfer-encoding'], '8bit')
576 # And now with a different charset
577 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
578 eq(msg['content-transfer-encoding'], 'quoted-printable')
579
R. David Murraye85200d2010-05-06 01:41:14 +0000580 def test_encode7or8bit(self):
581 # Make sure a charset whose input character set is 8bit but
582 # whose output character set is 7bit gets a transfer-encoding
583 # of 7bit.
584 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000585 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000586 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000587
Ezio Melottib3aedd42010-11-20 19:04:17 +0000588
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000589# Test long header wrapping
590class TestLongHeaders(TestEmailBase):
591 def test_split_long_continuation(self):
592 eq = self.ndiffAssertEqual
593 msg = email.message_from_string("""\
594Subject: bug demonstration
595\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
596\tmore text
597
598test
599""")
600 sfp = StringIO()
601 g = Generator(sfp)
602 g.flatten(msg)
603 eq(sfp.getvalue(), """\
604Subject: bug demonstration
605\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
606\tmore text
607
608test
609""")
610
611 def test_another_long_almost_unsplittable_header(self):
612 eq = self.ndiffAssertEqual
613 hstr = """\
614bug demonstration
615\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
616\tmore text"""
617 h = Header(hstr, continuation_ws='\t')
618 eq(h.encode(), """\
619bug demonstration
620\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
621\tmore text""")
622 h = Header(hstr.replace('\t', ' '))
623 eq(h.encode(), """\
624bug demonstration
625 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
626 more text""")
627
628 def test_long_nonstring(self):
629 eq = self.ndiffAssertEqual
630 g = Charset("iso-8859-1")
631 cz = Charset("iso-8859-2")
632 utf8 = Charset("utf-8")
633 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
634 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
635 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
636 b'bef\xf6rdert. ')
637 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
638 b'd\xf9vtipu.. ')
639 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
640 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
641 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
642 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
643 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
644 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
645 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
646 '\u3044\u307e\u3059\u3002')
647 h = Header(g_head, g, header_name='Subject')
648 h.append(cz_head, cz)
649 h.append(utf8_head, utf8)
650 msg = Message()
651 msg['Subject'] = h
652 sfp = StringIO()
653 g = Generator(sfp)
654 g.flatten(msg)
655 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000656Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
657 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
658 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
659 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
660 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
661 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
662 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
663 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
664 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
665 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
666 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000667
668""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000669 eq(h.encode(maxlinelen=76), """\
670=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
671 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
672 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
673 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
674 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
675 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
676 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
677 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
678 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
679 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
680 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000681
682 def test_long_header_encode(self):
683 eq = self.ndiffAssertEqual
684 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
685 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
686 header_name='X-Foobar-Spoink-Defrobnit')
687 eq(h.encode(), '''\
688wasnipoop; giraffes="very-long-necked-animals";
689 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
690
691 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
692 eq = self.ndiffAssertEqual
693 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
694 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
695 header_name='X-Foobar-Spoink-Defrobnit',
696 continuation_ws='\t')
697 eq(h.encode(), '''\
698wasnipoop; giraffes="very-long-necked-animals";
699 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
700
701 def test_long_header_encode_with_tab_continuation(self):
702 eq = self.ndiffAssertEqual
703 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
704 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
705 header_name='X-Foobar-Spoink-Defrobnit',
706 continuation_ws='\t')
707 eq(h.encode(), '''\
708wasnipoop; giraffes="very-long-necked-animals";
709\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
710
R David Murray3a6152f2011-03-14 21:13:03 -0400711 def test_header_encode_with_different_output_charset(self):
712 h = Header('文', 'euc-jp')
713 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
714
715 def test_long_header_encode_with_different_output_charset(self):
716 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
717 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
718 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
719 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
720 res = """\
721=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
722 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
723 self.assertEqual(h.encode(), res)
724
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000725 def test_header_splitter(self):
726 eq = self.ndiffAssertEqual
727 msg = MIMEText('')
728 # It'd be great if we could use add_header() here, but that doesn't
729 # guarantee an order of the parameters.
730 msg['X-Foobar-Spoink-Defrobnit'] = (
731 'wasnipoop; giraffes="very-long-necked-animals"; '
732 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
733 sfp = StringIO()
734 g = Generator(sfp)
735 g.flatten(msg)
736 eq(sfp.getvalue(), '''\
737Content-Type: text/plain; charset="us-ascii"
738MIME-Version: 1.0
739Content-Transfer-Encoding: 7bit
740X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
741 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
742
743''')
744
745 def test_no_semis_header_splitter(self):
746 eq = self.ndiffAssertEqual
747 msg = Message()
748 msg['From'] = 'test@dom.ain'
749 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
750 msg.set_payload('Test')
751 sfp = StringIO()
752 g = Generator(sfp)
753 g.flatten(msg)
754 eq(sfp.getvalue(), """\
755From: test@dom.ain
756References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
757 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
758
759Test""")
760
R David Murray7da4db12011-04-07 20:37:17 -0400761 def test_last_split_chunk_does_not_fit(self):
762 eq = self.ndiffAssertEqual
763 h = Header('Subject: the first part of this is short, but_the_second'
764 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
765 '_all_by_itself')
766 eq(h.encode(), """\
767Subject: the first part of this is short,
768 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
769
770 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
771 eq = self.ndiffAssertEqual
772 h = Header(', but_the_second'
773 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
774 '_all_by_itself')
775 eq(h.encode(), """\
776,
777 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
778
779 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
780 eq = self.ndiffAssertEqual
781 h = Header(', , but_the_second'
782 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
783 '_all_by_itself')
784 eq(h.encode(), """\
785, ,
786 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
787
788 def test_trailing_splitable_on_overlong_unsplitable(self):
789 eq = self.ndiffAssertEqual
790 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
791 'be_on_a_line_all_by_itself;')
792 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
793 "be_on_a_line_all_by_itself;")
794
795 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
796 eq = self.ndiffAssertEqual
797 h = Header('; '
798 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
799 'be_on_a_line_all_by_itself;')
800 eq(h.encode(), """\
801;
802 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
803
R David Murraye1292a22011-04-07 20:54:03 -0400804 def test_long_header_with_multiple_sequential_split_chars(self):
805 # Issue 11492
806
807 eq = self.ndiffAssertEqual
808 h = Header('This is a long line that has two whitespaces in a row. '
809 'This used to cause truncation of the header when folded')
810 eq(h.encode(), """\
811This is a long line that has two whitespaces in a row. This used to cause
812 truncation of the header when folded""")
813
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000814 def test_no_split_long_header(self):
815 eq = self.ndiffAssertEqual
816 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000817 h = Header(hstr)
818 # These come on two lines because Headers are really field value
819 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000820 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000821References:
822 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
823 h = Header('x' * 80)
824 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000825
826 def test_splitting_multiple_long_lines(self):
827 eq = self.ndiffAssertEqual
828 hstr = """\
829from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
830\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
831\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
832"""
833 h = Header(hstr, continuation_ws='\t')
834 eq(h.encode(), """\
835from babylon.socal-raves.org (localhost [127.0.0.1]);
836 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
837 for <mailman-admin@babylon.socal-raves.org>;
838 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
839\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
840 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
841 for <mailman-admin@babylon.socal-raves.org>;
842 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
843\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
844 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
845 for <mailman-admin@babylon.socal-raves.org>;
846 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
847
848 def test_splitting_first_line_only_is_long(self):
849 eq = self.ndiffAssertEqual
850 hstr = """\
851from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
852\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
853\tid 17k4h5-00034i-00
854\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
855 h = Header(hstr, maxlinelen=78, header_name='Received',
856 continuation_ws='\t')
857 eq(h.encode(), """\
858from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
859 helo=cthulhu.gerg.ca)
860\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
861\tid 17k4h5-00034i-00
862\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
863
864 def test_long_8bit_header(self):
865 eq = self.ndiffAssertEqual
866 msg = Message()
867 h = Header('Britische Regierung gibt', 'iso-8859-1',
868 header_name='Subject')
869 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +0000870 eq(h.encode(maxlinelen=76), """\
871=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
872 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000873 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +0000874 eq(msg.as_string(maxheaderlen=76), """\
875Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
876 =?iso-8859-1?q?hore-Windkraftprojekte?=
877
878""")
879 eq(msg.as_string(maxheaderlen=0), """\
880Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000881
882""")
883
884 def test_long_8bit_header_no_charset(self):
885 eq = self.ndiffAssertEqual
886 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +0000887 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
888 'f\xfcr Offshore-Windkraftprojekte '
889 '<a-very-long-address@example.com>')
890 msg['Reply-To'] = header_string
891 self.assertRaises(UnicodeEncodeError, msg.as_string)
892 msg = Message()
893 msg['Reply-To'] = Header(header_string, 'utf-8',
894 header_name='Reply-To')
895 eq(msg.as_string(maxheaderlen=78), """\
896Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
897 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000898
899""")
900
901 def test_long_to_header(self):
902 eq = self.ndiffAssertEqual
903 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
904 '<someone@eecs.umich.edu>,'
905 '"Someone Test #B" <someone@umich.edu>, '
906 '"Someone Test #C" <someone@eecs.umich.edu>, '
907 '"Someone Test #D" <someone@eecs.umich.edu>')
908 msg = Message()
909 msg['To'] = to
910 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +0000911To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000912 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +0000913 "Someone Test #C" <someone@eecs.umich.edu>,
914 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000915
916''')
917
918 def test_long_line_after_append(self):
919 eq = self.ndiffAssertEqual
920 s = 'This is an example of string which has almost the limit of header length.'
921 h = Header(s)
922 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +0000923 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000924This is an example of string which has almost the limit of header length.
925 Add another line.""")
926
927 def test_shorter_line_with_append(self):
928 eq = self.ndiffAssertEqual
929 s = 'This is a shorter line.'
930 h = Header(s)
931 h.append('Add another sentence. (Surprise?)')
932 eq(h.encode(),
933 'This is a shorter line. Add another sentence. (Surprise?)')
934
935 def test_long_field_name(self):
936 eq = self.ndiffAssertEqual
937 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +0000938 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
939 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
940 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
941 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000942 h = Header(gs, 'iso-8859-1', header_name=fn)
943 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +0000944 eq(h.encode(maxlinelen=76), """\
945=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
946 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
947 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
948 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000949
950 def test_long_received_header(self):
951 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
952 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
953 'Wed, 05 Mar 2003 18:10:18 -0700')
954 msg = Message()
955 msg['Received-1'] = Header(h, continuation_ws='\t')
956 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000957 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000958 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000959Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
960 Wed, 05 Mar 2003 18:10:18 -0700
961Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
962 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000963
964""")
965
966 def test_string_headerinst_eq(self):
967 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
968 'tu-muenchen.de> (David Bremner\'s message of '
969 '"Thu, 6 Mar 2003 13:58:21 +0100")')
970 msg = Message()
971 msg['Received-1'] = Header(h, header_name='Received-1',
972 continuation_ws='\t')
973 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000974 # XXX This should be splitting on spaces not commas.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000975 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000976Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
977 6 Mar 2003 13:58:21 +0100\")
978Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
979 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000980
981""")
982
983 def test_long_unbreakable_lines_with_continuation(self):
984 eq = self.ndiffAssertEqual
985 msg = Message()
986 t = """\
987iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
988 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
989 msg['Face-1'] = t
990 msg['Face-2'] = Header(t, header_name='Face-2')
Barry Warsawbef9d212007-08-31 10:55:37 +0000991 # XXX This splitting is all wrong. It the first value line should be
992 # snug against the field name.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000993 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +0000994Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000995 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000996 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +0000997Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +0000998 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000999 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
1000
1001""")
1002
1003 def test_another_long_multiline_header(self):
1004 eq = self.ndiffAssertEqual
1005 m = ('Received: from siimage.com '
1006 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001007 'Microsoft SMTPSVC(5.0.2195.4905); '
1008 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001009 msg = email.message_from_string(m)
1010 eq(msg.as_string(maxheaderlen=78), '''\
Barry Warsawbef9d212007-08-31 10:55:37 +00001011Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
1012 Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001013
1014''')
1015
1016 def test_long_lines_with_different_header(self):
1017 eq = self.ndiffAssertEqual
1018 h = ('List-Unsubscribe: '
1019 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1020 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1021 '?subject=unsubscribe>')
1022 msg = Message()
1023 msg['List'] = h
1024 msg['List'] = Header(h, header_name='List')
1025 eq(msg.as_string(maxheaderlen=78), """\
1026List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001027 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001028List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001029 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001030
1031""")
1032
R. David Murray6f0022d2011-01-07 21:57:25 +00001033 def test_long_rfc2047_header_with_embedded_fws(self):
1034 h = Header(textwrap.dedent("""\
1035 We're going to pretend this header is in a non-ascii character set
1036 \tto see if line wrapping with encoded words and embedded
1037 folding white space works"""),
1038 charset='utf-8',
1039 header_name='Test')
1040 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1041 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1042 =?utf-8?q?cter_set?=
1043 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1044 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1045
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001046
Ezio Melottib3aedd42010-11-20 19:04:17 +00001047
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001048# Test mangling of "From " lines in the body of a message
1049class TestFromMangling(unittest.TestCase):
1050 def setUp(self):
1051 self.msg = Message()
1052 self.msg['From'] = 'aaa@bbb.org'
1053 self.msg.set_payload("""\
1054From the desk of A.A.A.:
1055Blah blah blah
1056""")
1057
1058 def test_mangled_from(self):
1059 s = StringIO()
1060 g = Generator(s, mangle_from_=True)
1061 g.flatten(self.msg)
1062 self.assertEqual(s.getvalue(), """\
1063From: aaa@bbb.org
1064
1065>From the desk of A.A.A.:
1066Blah blah blah
1067""")
1068
1069 def test_dont_mangle_from(self):
1070 s = StringIO()
1071 g = Generator(s, mangle_from_=False)
1072 g.flatten(self.msg)
1073 self.assertEqual(s.getvalue(), """\
1074From: aaa@bbb.org
1075
1076From the desk of A.A.A.:
1077Blah blah blah
1078""")
1079
1080
Ezio Melottib3aedd42010-11-20 19:04:17 +00001081
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001082# Test the basic MIMEAudio class
1083class TestMIMEAudio(unittest.TestCase):
1084 def setUp(self):
R David Murray28346b82011-03-31 11:40:20 -04001085 with openfile('audiotest.au', 'rb') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001086 self._audiodata = fp.read()
1087 self._au = MIMEAudio(self._audiodata)
1088
1089 def test_guess_minor_type(self):
1090 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1091
1092 def test_encoding(self):
1093 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001094 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1095 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001096
1097 def test_checkSetMinor(self):
1098 au = MIMEAudio(self._audiodata, 'fish')
1099 self.assertEqual(au.get_content_type(), 'audio/fish')
1100
1101 def test_add_header(self):
1102 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001103 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001104 self._au.add_header('Content-Disposition', 'attachment',
1105 filename='audiotest.au')
1106 eq(self._au['content-disposition'],
1107 'attachment; filename="audiotest.au"')
1108 eq(self._au.get_params(header='content-disposition'),
1109 [('attachment', ''), ('filename', 'audiotest.au')])
1110 eq(self._au.get_param('filename', header='content-disposition'),
1111 'audiotest.au')
1112 missing = []
1113 eq(self._au.get_param('attachment', header='content-disposition'), '')
1114 unless(self._au.get_param('foo', failobj=missing,
1115 header='content-disposition') is missing)
1116 # Try some missing stuff
1117 unless(self._au.get_param('foobar', missing) is missing)
1118 unless(self._au.get_param('attachment', missing,
1119 header='foobar') is missing)
1120
1121
Ezio Melottib3aedd42010-11-20 19:04:17 +00001122
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001123# Test the basic MIMEImage class
1124class TestMIMEImage(unittest.TestCase):
1125 def setUp(self):
1126 with openfile('PyBanner048.gif', 'rb') as fp:
1127 self._imgdata = fp.read()
1128 self._im = MIMEImage(self._imgdata)
1129
1130 def test_guess_minor_type(self):
1131 self.assertEqual(self._im.get_content_type(), 'image/gif')
1132
1133 def test_encoding(self):
1134 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001135 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1136 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001137
1138 def test_checkSetMinor(self):
1139 im = MIMEImage(self._imgdata, 'fish')
1140 self.assertEqual(im.get_content_type(), 'image/fish')
1141
1142 def test_add_header(self):
1143 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001144 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001145 self._im.add_header('Content-Disposition', 'attachment',
1146 filename='dingusfish.gif')
1147 eq(self._im['content-disposition'],
1148 'attachment; filename="dingusfish.gif"')
1149 eq(self._im.get_params(header='content-disposition'),
1150 [('attachment', ''), ('filename', 'dingusfish.gif')])
1151 eq(self._im.get_param('filename', header='content-disposition'),
1152 'dingusfish.gif')
1153 missing = []
1154 eq(self._im.get_param('attachment', header='content-disposition'), '')
1155 unless(self._im.get_param('foo', failobj=missing,
1156 header='content-disposition') is missing)
1157 # Try some missing stuff
1158 unless(self._im.get_param('foobar', missing) is missing)
1159 unless(self._im.get_param('attachment', missing,
1160 header='foobar') is missing)
1161
1162
Ezio Melottib3aedd42010-11-20 19:04:17 +00001163
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001164# Test the basic MIMEApplication class
1165class TestMIMEApplication(unittest.TestCase):
1166 def test_headers(self):
1167 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001168 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001169 eq(msg.get_content_type(), 'application/octet-stream')
1170 eq(msg['content-transfer-encoding'], 'base64')
1171
1172 def test_body(self):
1173 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001174 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1175 msg = MIMEApplication(bytesdata)
1176 # whitespace in the cte encoded block is RFC-irrelevant.
1177 eq(msg.get_payload().strip(), '+vv8/f7/')
1178 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001179
1180
Ezio Melottib3aedd42010-11-20 19:04:17 +00001181
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001182# Test the basic MIMEText class
1183class TestMIMEText(unittest.TestCase):
1184 def setUp(self):
1185 self._msg = MIMEText('hello there')
1186
1187 def test_types(self):
1188 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001189 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001190 eq(self._msg.get_content_type(), 'text/plain')
1191 eq(self._msg.get_param('charset'), 'us-ascii')
1192 missing = []
1193 unless(self._msg.get_param('foobar', missing) is missing)
1194 unless(self._msg.get_param('charset', missing, header='foobar')
1195 is missing)
1196
1197 def test_payload(self):
1198 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001199 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001200
1201 def test_charset(self):
1202 eq = self.assertEqual
1203 msg = MIMEText('hello there', _charset='us-ascii')
1204 eq(msg.get_charset().input_charset, 'us-ascii')
1205 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1206
R. David Murray850fc852010-06-03 01:58:28 +00001207 def test_7bit_input(self):
1208 eq = self.assertEqual
1209 msg = MIMEText('hello there', _charset='us-ascii')
1210 eq(msg.get_charset().input_charset, 'us-ascii')
1211 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1212
1213 def test_7bit_input_no_charset(self):
1214 eq = self.assertEqual
1215 msg = MIMEText('hello there')
1216 eq(msg.get_charset(), 'us-ascii')
1217 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1218 self.assertTrue('hello there' in msg.as_string())
1219
1220 def test_utf8_input(self):
1221 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1222 eq = self.assertEqual
1223 msg = MIMEText(teststr, _charset='utf-8')
1224 eq(msg.get_charset().output_charset, 'utf-8')
1225 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1226 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1227
1228 @unittest.skip("can't fix because of backward compat in email5, "
1229 "will fix in email6")
1230 def test_utf8_input_no_charset(self):
1231 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1232 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1233
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001234
Ezio Melottib3aedd42010-11-20 19:04:17 +00001235
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001236# Test complicated multipart/* messages
1237class TestMultipart(TestEmailBase):
1238 def setUp(self):
1239 with openfile('PyBanner048.gif', 'rb') as fp:
1240 data = fp.read()
1241 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1242 image = MIMEImage(data, name='dingusfish.gif')
1243 image.add_header('content-disposition', 'attachment',
1244 filename='dingusfish.gif')
1245 intro = MIMEText('''\
1246Hi there,
1247
1248This is the dingus fish.
1249''')
1250 container.attach(intro)
1251 container.attach(image)
1252 container['From'] = 'Barry <barry@digicool.com>'
1253 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1254 container['Subject'] = 'Here is your dingus fish'
1255
1256 now = 987809702.54848599
1257 timetuple = time.localtime(now)
1258 if timetuple[-1] == 0:
1259 tzsecs = time.timezone
1260 else:
1261 tzsecs = time.altzone
1262 if tzsecs > 0:
1263 sign = '-'
1264 else:
1265 sign = '+'
1266 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1267 container['Date'] = time.strftime(
1268 '%a, %d %b %Y %H:%M:%S',
1269 time.localtime(now)) + tzoffset
1270 self._msg = container
1271 self._im = image
1272 self._txt = intro
1273
1274 def test_hierarchy(self):
1275 # convenience
1276 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001277 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001278 raises = self.assertRaises
1279 # tests
1280 m = self._msg
1281 unless(m.is_multipart())
1282 eq(m.get_content_type(), 'multipart/mixed')
1283 eq(len(m.get_payload()), 2)
1284 raises(IndexError, m.get_payload, 2)
1285 m0 = m.get_payload(0)
1286 m1 = m.get_payload(1)
1287 unless(m0 is self._txt)
1288 unless(m1 is self._im)
1289 eq(m.get_payload(), [m0, m1])
1290 unless(not m0.is_multipart())
1291 unless(not m1.is_multipart())
1292
1293 def test_empty_multipart_idempotent(self):
1294 text = """\
1295Content-Type: multipart/mixed; boundary="BOUNDARY"
1296MIME-Version: 1.0
1297Subject: A subject
1298To: aperson@dom.ain
1299From: bperson@dom.ain
1300
1301
1302--BOUNDARY
1303
1304
1305--BOUNDARY--
1306"""
1307 msg = Parser().parsestr(text)
1308 self.ndiffAssertEqual(text, msg.as_string())
1309
1310 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1311 outer = MIMEBase('multipart', 'mixed')
1312 outer['Subject'] = 'A subject'
1313 outer['To'] = 'aperson@dom.ain'
1314 outer['From'] = 'bperson@dom.ain'
1315 outer.set_boundary('BOUNDARY')
1316 self.ndiffAssertEqual(outer.as_string(), '''\
1317Content-Type: multipart/mixed; boundary="BOUNDARY"
1318MIME-Version: 1.0
1319Subject: A subject
1320To: aperson@dom.ain
1321From: bperson@dom.ain
1322
1323--BOUNDARY
1324
1325--BOUNDARY--''')
1326
1327 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1328 outer = MIMEBase('multipart', 'mixed')
1329 outer['Subject'] = 'A subject'
1330 outer['To'] = 'aperson@dom.ain'
1331 outer['From'] = 'bperson@dom.ain'
1332 outer.preamble = ''
1333 outer.epilogue = ''
1334 outer.set_boundary('BOUNDARY')
1335 self.ndiffAssertEqual(outer.as_string(), '''\
1336Content-Type: multipart/mixed; boundary="BOUNDARY"
1337MIME-Version: 1.0
1338Subject: A subject
1339To: aperson@dom.ain
1340From: bperson@dom.ain
1341
1342
1343--BOUNDARY
1344
1345--BOUNDARY--
1346''')
1347
1348 def test_one_part_in_a_multipart(self):
1349 eq = self.ndiffAssertEqual
1350 outer = MIMEBase('multipart', 'mixed')
1351 outer['Subject'] = 'A subject'
1352 outer['To'] = 'aperson@dom.ain'
1353 outer['From'] = 'bperson@dom.ain'
1354 outer.set_boundary('BOUNDARY')
1355 msg = MIMEText('hello world')
1356 outer.attach(msg)
1357 eq(outer.as_string(), '''\
1358Content-Type: multipart/mixed; boundary="BOUNDARY"
1359MIME-Version: 1.0
1360Subject: A subject
1361To: aperson@dom.ain
1362From: bperson@dom.ain
1363
1364--BOUNDARY
1365Content-Type: text/plain; charset="us-ascii"
1366MIME-Version: 1.0
1367Content-Transfer-Encoding: 7bit
1368
1369hello world
1370--BOUNDARY--''')
1371
1372 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1373 eq = self.ndiffAssertEqual
1374 outer = MIMEBase('multipart', 'mixed')
1375 outer['Subject'] = 'A subject'
1376 outer['To'] = 'aperson@dom.ain'
1377 outer['From'] = 'bperson@dom.ain'
1378 outer.preamble = ''
1379 msg = MIMEText('hello world')
1380 outer.attach(msg)
1381 outer.set_boundary('BOUNDARY')
1382 eq(outer.as_string(), '''\
1383Content-Type: multipart/mixed; boundary="BOUNDARY"
1384MIME-Version: 1.0
1385Subject: A subject
1386To: aperson@dom.ain
1387From: bperson@dom.ain
1388
1389
1390--BOUNDARY
1391Content-Type: text/plain; charset="us-ascii"
1392MIME-Version: 1.0
1393Content-Transfer-Encoding: 7bit
1394
1395hello world
1396--BOUNDARY--''')
1397
1398
1399 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1400 eq = self.ndiffAssertEqual
1401 outer = MIMEBase('multipart', 'mixed')
1402 outer['Subject'] = 'A subject'
1403 outer['To'] = 'aperson@dom.ain'
1404 outer['From'] = 'bperson@dom.ain'
1405 outer.preamble = None
1406 msg = MIMEText('hello world')
1407 outer.attach(msg)
1408 outer.set_boundary('BOUNDARY')
1409 eq(outer.as_string(), '''\
1410Content-Type: multipart/mixed; boundary="BOUNDARY"
1411MIME-Version: 1.0
1412Subject: A subject
1413To: aperson@dom.ain
1414From: bperson@dom.ain
1415
1416--BOUNDARY
1417Content-Type: text/plain; charset="us-ascii"
1418MIME-Version: 1.0
1419Content-Transfer-Encoding: 7bit
1420
1421hello world
1422--BOUNDARY--''')
1423
1424
1425 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1426 eq = self.ndiffAssertEqual
1427 outer = MIMEBase('multipart', 'mixed')
1428 outer['Subject'] = 'A subject'
1429 outer['To'] = 'aperson@dom.ain'
1430 outer['From'] = 'bperson@dom.ain'
1431 outer.epilogue = None
1432 msg = MIMEText('hello world')
1433 outer.attach(msg)
1434 outer.set_boundary('BOUNDARY')
1435 eq(outer.as_string(), '''\
1436Content-Type: multipart/mixed; boundary="BOUNDARY"
1437MIME-Version: 1.0
1438Subject: A subject
1439To: aperson@dom.ain
1440From: bperson@dom.ain
1441
1442--BOUNDARY
1443Content-Type: text/plain; charset="us-ascii"
1444MIME-Version: 1.0
1445Content-Transfer-Encoding: 7bit
1446
1447hello world
1448--BOUNDARY--''')
1449
1450
1451 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1452 eq = self.ndiffAssertEqual
1453 outer = MIMEBase('multipart', 'mixed')
1454 outer['Subject'] = 'A subject'
1455 outer['To'] = 'aperson@dom.ain'
1456 outer['From'] = 'bperson@dom.ain'
1457 outer.epilogue = ''
1458 msg = MIMEText('hello world')
1459 outer.attach(msg)
1460 outer.set_boundary('BOUNDARY')
1461 eq(outer.as_string(), '''\
1462Content-Type: multipart/mixed; boundary="BOUNDARY"
1463MIME-Version: 1.0
1464Subject: A subject
1465To: aperson@dom.ain
1466From: bperson@dom.ain
1467
1468--BOUNDARY
1469Content-Type: text/plain; charset="us-ascii"
1470MIME-Version: 1.0
1471Content-Transfer-Encoding: 7bit
1472
1473hello world
1474--BOUNDARY--
1475''')
1476
1477
1478 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1479 eq = self.ndiffAssertEqual
1480 outer = MIMEBase('multipart', 'mixed')
1481 outer['Subject'] = 'A subject'
1482 outer['To'] = 'aperson@dom.ain'
1483 outer['From'] = 'bperson@dom.ain'
1484 outer.epilogue = '\n'
1485 msg = MIMEText('hello world')
1486 outer.attach(msg)
1487 outer.set_boundary('BOUNDARY')
1488 eq(outer.as_string(), '''\
1489Content-Type: multipart/mixed; boundary="BOUNDARY"
1490MIME-Version: 1.0
1491Subject: A subject
1492To: aperson@dom.ain
1493From: bperson@dom.ain
1494
1495--BOUNDARY
1496Content-Type: text/plain; charset="us-ascii"
1497MIME-Version: 1.0
1498Content-Transfer-Encoding: 7bit
1499
1500hello world
1501--BOUNDARY--
1502
1503''')
1504
1505 def test_message_external_body(self):
1506 eq = self.assertEqual
1507 msg = self._msgobj('msg_36.txt')
1508 eq(len(msg.get_payload()), 2)
1509 msg1 = msg.get_payload(1)
1510 eq(msg1.get_content_type(), 'multipart/alternative')
1511 eq(len(msg1.get_payload()), 2)
1512 for subpart in msg1.get_payload():
1513 eq(subpart.get_content_type(), 'message/external-body')
1514 eq(len(subpart.get_payload()), 1)
1515 subsubpart = subpart.get_payload(0)
1516 eq(subsubpart.get_content_type(), 'text/plain')
1517
1518 def test_double_boundary(self):
1519 # msg_37.txt is a multipart that contains two dash-boundary's in a
1520 # row. Our interpretation of RFC 2046 calls for ignoring the second
1521 # and subsequent boundaries.
1522 msg = self._msgobj('msg_37.txt')
1523 self.assertEqual(len(msg.get_payload()), 3)
1524
1525 def test_nested_inner_contains_outer_boundary(self):
1526 eq = self.ndiffAssertEqual
1527 # msg_38.txt has an inner part that contains outer boundaries. My
1528 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1529 # these are illegal and should be interpreted as unterminated inner
1530 # parts.
1531 msg = self._msgobj('msg_38.txt')
1532 sfp = StringIO()
1533 iterators._structure(msg, sfp)
1534 eq(sfp.getvalue(), """\
1535multipart/mixed
1536 multipart/mixed
1537 multipart/alternative
1538 text/plain
1539 text/plain
1540 text/plain
1541 text/plain
1542""")
1543
1544 def test_nested_with_same_boundary(self):
1545 eq = self.ndiffAssertEqual
1546 # msg 39.txt is similarly evil in that it's got inner parts that use
1547 # the same boundary as outer parts. Again, I believe the way this is
1548 # parsed is closest to the spirit of RFC 2046
1549 msg = self._msgobj('msg_39.txt')
1550 sfp = StringIO()
1551 iterators._structure(msg, sfp)
1552 eq(sfp.getvalue(), """\
1553multipart/mixed
1554 multipart/mixed
1555 multipart/alternative
1556 application/octet-stream
1557 application/octet-stream
1558 text/plain
1559""")
1560
1561 def test_boundary_in_non_multipart(self):
1562 msg = self._msgobj('msg_40.txt')
1563 self.assertEqual(msg.as_string(), '''\
1564MIME-Version: 1.0
1565Content-Type: text/html; boundary="--961284236552522269"
1566
1567----961284236552522269
1568Content-Type: text/html;
1569Content-Transfer-Encoding: 7Bit
1570
1571<html></html>
1572
1573----961284236552522269--
1574''')
1575
1576 def test_boundary_with_leading_space(self):
1577 eq = self.assertEqual
1578 msg = email.message_from_string('''\
1579MIME-Version: 1.0
1580Content-Type: multipart/mixed; boundary=" XXXX"
1581
1582-- XXXX
1583Content-Type: text/plain
1584
1585
1586-- XXXX
1587Content-Type: text/plain
1588
1589-- XXXX--
1590''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001591 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001592 eq(msg.get_boundary(), ' XXXX')
1593 eq(len(msg.get_payload()), 2)
1594
1595 def test_boundary_without_trailing_newline(self):
1596 m = Parser().parsestr("""\
1597Content-Type: multipart/mixed; boundary="===============0012394164=="
1598MIME-Version: 1.0
1599
1600--===============0012394164==
1601Content-Type: image/file1.jpg
1602MIME-Version: 1.0
1603Content-Transfer-Encoding: base64
1604
1605YXNkZg==
1606--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001607 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001608
1609
Ezio Melottib3aedd42010-11-20 19:04:17 +00001610
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001611# Test some badly formatted messages
1612class TestNonConformant(TestEmailBase):
1613 def test_parse_missing_minor_type(self):
1614 eq = self.assertEqual
1615 msg = self._msgobj('msg_14.txt')
1616 eq(msg.get_content_type(), 'text/plain')
1617 eq(msg.get_content_maintype(), 'text')
1618 eq(msg.get_content_subtype(), 'plain')
1619
1620 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001621 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001622 msg = self._msgobj('msg_15.txt')
1623 # XXX We can probably eventually do better
1624 inner = msg.get_payload(0)
1625 unless(hasattr(inner, 'defects'))
1626 self.assertEqual(len(inner.defects), 1)
1627 unless(isinstance(inner.defects[0],
1628 errors.StartBoundaryNotFoundDefect))
1629
1630 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001631 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001632 msg = self._msgobj('msg_25.txt')
1633 unless(isinstance(msg.get_payload(), str))
1634 self.assertEqual(len(msg.defects), 2)
1635 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1636 unless(isinstance(msg.defects[1],
1637 errors.MultipartInvariantViolationDefect))
1638
1639 def test_invalid_content_type(self):
1640 eq = self.assertEqual
1641 neq = self.ndiffAssertEqual
1642 msg = Message()
1643 # RFC 2045, $5.2 says invalid yields text/plain
1644 msg['Content-Type'] = 'text'
1645 eq(msg.get_content_maintype(), 'text')
1646 eq(msg.get_content_subtype(), 'plain')
1647 eq(msg.get_content_type(), 'text/plain')
1648 # Clear the old value and try something /really/ invalid
1649 del msg['content-type']
1650 msg['Content-Type'] = 'foo'
1651 eq(msg.get_content_maintype(), 'text')
1652 eq(msg.get_content_subtype(), 'plain')
1653 eq(msg.get_content_type(), 'text/plain')
1654 # Still, make sure that the message is idempotently generated
1655 s = StringIO()
1656 g = Generator(s)
1657 g.flatten(msg)
1658 neq(s.getvalue(), 'Content-Type: foo\n\n')
1659
1660 def test_no_start_boundary(self):
1661 eq = self.ndiffAssertEqual
1662 msg = self._msgobj('msg_31.txt')
1663 eq(msg.get_payload(), """\
1664--BOUNDARY
1665Content-Type: text/plain
1666
1667message 1
1668
1669--BOUNDARY
1670Content-Type: text/plain
1671
1672message 2
1673
1674--BOUNDARY--
1675""")
1676
1677 def test_no_separating_blank_line(self):
1678 eq = self.ndiffAssertEqual
1679 msg = self._msgobj('msg_35.txt')
1680 eq(msg.as_string(), """\
1681From: aperson@dom.ain
1682To: bperson@dom.ain
1683Subject: here's something interesting
1684
1685counter to RFC 2822, there's no separating newline here
1686""")
1687
1688 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001689 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001690 msg = self._msgobj('msg_41.txt')
1691 unless(hasattr(msg, 'defects'))
1692 self.assertEqual(len(msg.defects), 2)
1693 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1694 unless(isinstance(msg.defects[1],
1695 errors.MultipartInvariantViolationDefect))
1696
1697 def test_missing_start_boundary(self):
1698 outer = self._msgobj('msg_42.txt')
1699 # The message structure is:
1700 #
1701 # multipart/mixed
1702 # text/plain
1703 # message/rfc822
1704 # multipart/mixed [*]
1705 #
1706 # [*] This message is missing its start boundary
1707 bad = outer.get_payload(1).get_payload(0)
1708 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001709 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001710 errors.StartBoundaryNotFoundDefect))
1711
1712 def test_first_line_is_continuation_header(self):
1713 eq = self.assertEqual
1714 m = ' Line 1\nLine 2\nLine 3'
1715 msg = email.message_from_string(m)
1716 eq(msg.keys(), [])
1717 eq(msg.get_payload(), 'Line 2\nLine 3')
1718 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001719 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001720 errors.FirstHeaderLineIsContinuationDefect))
1721 eq(msg.defects[0].line, ' Line 1\n')
1722
1723
Ezio Melottib3aedd42010-11-20 19:04:17 +00001724
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001725# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001726class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001727 def test_rfc2047_multiline(self):
1728 eq = self.assertEqual
1729 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1730 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1731 dh = decode_header(s)
1732 eq(dh, [
1733 (b'Re:', None),
1734 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1735 (b'baz foo bar', None),
1736 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1737 header = make_header(dh)
1738 eq(str(header),
1739 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001740 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001741Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1742 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001743
1744 def test_whitespace_eater_unicode(self):
1745 eq = self.assertEqual
1746 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1747 dh = decode_header(s)
1748 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1749 (b'Pirard <pirard@dom.ain>', None)])
1750 header = str(make_header(dh))
1751 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1752
1753 def test_whitespace_eater_unicode_2(self):
1754 eq = self.assertEqual
1755 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1756 dh = decode_header(s)
1757 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1758 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1759 hu = str(make_header(dh))
1760 eq(hu, 'The quick brown fox jumped over the lazy dog')
1761
1762 def test_rfc2047_missing_whitespace(self):
1763 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1764 dh = decode_header(s)
1765 self.assertEqual(dh, [(s, None)])
1766
1767 def test_rfc2047_with_whitespace(self):
1768 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1769 dh = decode_header(s)
1770 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1771 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1772 (b'sbord', None)])
1773
R. David Murrayc4e69cc2010-08-03 22:14:10 +00001774 def test_rfc2047_B_bad_padding(self):
1775 s = '=?iso-8859-1?B?%s?='
1776 data = [ # only test complete bytes
1777 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
1778 ('dmk=', b'vi'), ('dmk', b'vi')
1779 ]
1780 for q, a in data:
1781 dh = decode_header(s % q)
1782 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001783
R. David Murray31e984c2010-10-01 15:40:20 +00001784 def test_rfc2047_Q_invalid_digits(self):
1785 # issue 10004.
1786 s = '=?iso-8659-1?Q?andr=e9=zz?='
1787 self.assertEqual(decode_header(s),
1788 [(b'andr\xe9=zz', 'iso-8659-1')])
1789
Ezio Melottib3aedd42010-11-20 19:04:17 +00001790
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001791# Test the MIMEMessage class
1792class TestMIMEMessage(TestEmailBase):
1793 def setUp(self):
1794 with openfile('msg_11.txt') as fp:
1795 self._text = fp.read()
1796
1797 def test_type_error(self):
1798 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1799
1800 def test_valid_argument(self):
1801 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001802 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001803 subject = 'A sub-message'
1804 m = Message()
1805 m['Subject'] = subject
1806 r = MIMEMessage(m)
1807 eq(r.get_content_type(), 'message/rfc822')
1808 payload = r.get_payload()
1809 unless(isinstance(payload, list))
1810 eq(len(payload), 1)
1811 subpart = payload[0]
1812 unless(subpart is m)
1813 eq(subpart['subject'], subject)
1814
1815 def test_bad_multipart(self):
1816 eq = self.assertEqual
1817 msg1 = Message()
1818 msg1['Subject'] = 'subpart 1'
1819 msg2 = Message()
1820 msg2['Subject'] = 'subpart 2'
1821 r = MIMEMessage(msg1)
1822 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
1823
1824 def test_generate(self):
1825 # First craft the message to be encapsulated
1826 m = Message()
1827 m['Subject'] = 'An enclosed message'
1828 m.set_payload('Here is the body of the message.\n')
1829 r = MIMEMessage(m)
1830 r['Subject'] = 'The enclosing message'
1831 s = StringIO()
1832 g = Generator(s)
1833 g.flatten(r)
1834 self.assertEqual(s.getvalue(), """\
1835Content-Type: message/rfc822
1836MIME-Version: 1.0
1837Subject: The enclosing message
1838
1839Subject: An enclosed message
1840
1841Here is the body of the message.
1842""")
1843
1844 def test_parse_message_rfc822(self):
1845 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001846 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001847 msg = self._msgobj('msg_11.txt')
1848 eq(msg.get_content_type(), 'message/rfc822')
1849 payload = msg.get_payload()
1850 unless(isinstance(payload, list))
1851 eq(len(payload), 1)
1852 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001853 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001854 eq(submsg['subject'], 'An enclosed message')
1855 eq(submsg.get_payload(), 'Here is the body of the message.\n')
1856
1857 def test_dsn(self):
1858 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001859 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001860 # msg 16 is a Delivery Status Notification, see RFC 1894
1861 msg = self._msgobj('msg_16.txt')
1862 eq(msg.get_content_type(), 'multipart/report')
1863 unless(msg.is_multipart())
1864 eq(len(msg.get_payload()), 3)
1865 # Subpart 1 is a text/plain, human readable section
1866 subpart = msg.get_payload(0)
1867 eq(subpart.get_content_type(), 'text/plain')
1868 eq(subpart.get_payload(), """\
1869This report relates to a message you sent with the following header fields:
1870
1871 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
1872 Date: Sun, 23 Sep 2001 20:10:55 -0700
1873 From: "Ian T. Henry" <henryi@oxy.edu>
1874 To: SoCal Raves <scr@socal-raves.org>
1875 Subject: [scr] yeah for Ians!!
1876
1877Your message cannot be delivered to the following recipients:
1878
1879 Recipient address: jangel1@cougar.noc.ucla.edu
1880 Reason: recipient reached disk quota
1881
1882""")
1883 # Subpart 2 contains the machine parsable DSN information. It
1884 # consists of two blocks of headers, represented by two nested Message
1885 # objects.
1886 subpart = msg.get_payload(1)
1887 eq(subpart.get_content_type(), 'message/delivery-status')
1888 eq(len(subpart.get_payload()), 2)
1889 # message/delivery-status should treat each block as a bunch of
1890 # headers, i.e. a bunch of Message objects.
1891 dsn1 = subpart.get_payload(0)
1892 unless(isinstance(dsn1, Message))
1893 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
1894 eq(dsn1.get_param('dns', header='reporting-mta'), '')
1895 # Try a missing one <wink>
1896 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
1897 dsn2 = subpart.get_payload(1)
1898 unless(isinstance(dsn2, Message))
1899 eq(dsn2['action'], 'failed')
1900 eq(dsn2.get_params(header='original-recipient'),
1901 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
1902 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
1903 # Subpart 3 is the original message
1904 subpart = msg.get_payload(2)
1905 eq(subpart.get_content_type(), 'message/rfc822')
1906 payload = subpart.get_payload()
1907 unless(isinstance(payload, list))
1908 eq(len(payload), 1)
1909 subsubpart = payload[0]
1910 unless(isinstance(subsubpart, Message))
1911 eq(subsubpart.get_content_type(), 'text/plain')
1912 eq(subsubpart['message-id'],
1913 '<002001c144a6$8752e060$56104586@oxy.edu>')
1914
1915 def test_epilogue(self):
1916 eq = self.ndiffAssertEqual
1917 with openfile('msg_21.txt') as fp:
1918 text = fp.read()
1919 msg = Message()
1920 msg['From'] = 'aperson@dom.ain'
1921 msg['To'] = 'bperson@dom.ain'
1922 msg['Subject'] = 'Test'
1923 msg.preamble = 'MIME message'
1924 msg.epilogue = 'End of MIME message\n'
1925 msg1 = MIMEText('One')
1926 msg2 = MIMEText('Two')
1927 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1928 msg.attach(msg1)
1929 msg.attach(msg2)
1930 sfp = StringIO()
1931 g = Generator(sfp)
1932 g.flatten(msg)
1933 eq(sfp.getvalue(), text)
1934
1935 def test_no_nl_preamble(self):
1936 eq = self.ndiffAssertEqual
1937 msg = Message()
1938 msg['From'] = 'aperson@dom.ain'
1939 msg['To'] = 'bperson@dom.ain'
1940 msg['Subject'] = 'Test'
1941 msg.preamble = 'MIME message'
1942 msg.epilogue = ''
1943 msg1 = MIMEText('One')
1944 msg2 = MIMEText('Two')
1945 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1946 msg.attach(msg1)
1947 msg.attach(msg2)
1948 eq(msg.as_string(), """\
1949From: aperson@dom.ain
1950To: bperson@dom.ain
1951Subject: Test
1952Content-Type: multipart/mixed; boundary="BOUNDARY"
1953
1954MIME message
1955--BOUNDARY
1956Content-Type: text/plain; charset="us-ascii"
1957MIME-Version: 1.0
1958Content-Transfer-Encoding: 7bit
1959
1960One
1961--BOUNDARY
1962Content-Type: text/plain; charset="us-ascii"
1963MIME-Version: 1.0
1964Content-Transfer-Encoding: 7bit
1965
1966Two
1967--BOUNDARY--
1968""")
1969
1970 def test_default_type(self):
1971 eq = self.assertEqual
1972 with openfile('msg_30.txt') as fp:
1973 msg = email.message_from_file(fp)
1974 container1 = msg.get_payload(0)
1975 eq(container1.get_default_type(), 'message/rfc822')
1976 eq(container1.get_content_type(), 'message/rfc822')
1977 container2 = msg.get_payload(1)
1978 eq(container2.get_default_type(), 'message/rfc822')
1979 eq(container2.get_content_type(), 'message/rfc822')
1980 container1a = container1.get_payload(0)
1981 eq(container1a.get_default_type(), 'text/plain')
1982 eq(container1a.get_content_type(), 'text/plain')
1983 container2a = container2.get_payload(0)
1984 eq(container2a.get_default_type(), 'text/plain')
1985 eq(container2a.get_content_type(), 'text/plain')
1986
1987 def test_default_type_with_explicit_container_type(self):
1988 eq = self.assertEqual
1989 with openfile('msg_28.txt') as fp:
1990 msg = email.message_from_file(fp)
1991 container1 = msg.get_payload(0)
1992 eq(container1.get_default_type(), 'message/rfc822')
1993 eq(container1.get_content_type(), 'message/rfc822')
1994 container2 = msg.get_payload(1)
1995 eq(container2.get_default_type(), 'message/rfc822')
1996 eq(container2.get_content_type(), 'message/rfc822')
1997 container1a = container1.get_payload(0)
1998 eq(container1a.get_default_type(), 'text/plain')
1999 eq(container1a.get_content_type(), 'text/plain')
2000 container2a = container2.get_payload(0)
2001 eq(container2a.get_default_type(), 'text/plain')
2002 eq(container2a.get_content_type(), 'text/plain')
2003
2004 def test_default_type_non_parsed(self):
2005 eq = self.assertEqual
2006 neq = self.ndiffAssertEqual
2007 # Set up container
2008 container = MIMEMultipart('digest', 'BOUNDARY')
2009 container.epilogue = ''
2010 # Set up subparts
2011 subpart1a = MIMEText('message 1\n')
2012 subpart2a = MIMEText('message 2\n')
2013 subpart1 = MIMEMessage(subpart1a)
2014 subpart2 = MIMEMessage(subpart2a)
2015 container.attach(subpart1)
2016 container.attach(subpart2)
2017 eq(subpart1.get_content_type(), 'message/rfc822')
2018 eq(subpart1.get_default_type(), 'message/rfc822')
2019 eq(subpart2.get_content_type(), 'message/rfc822')
2020 eq(subpart2.get_default_type(), 'message/rfc822')
2021 neq(container.as_string(0), '''\
2022Content-Type: multipart/digest; boundary="BOUNDARY"
2023MIME-Version: 1.0
2024
2025--BOUNDARY
2026Content-Type: message/rfc822
2027MIME-Version: 1.0
2028
2029Content-Type: text/plain; charset="us-ascii"
2030MIME-Version: 1.0
2031Content-Transfer-Encoding: 7bit
2032
2033message 1
2034
2035--BOUNDARY
2036Content-Type: message/rfc822
2037MIME-Version: 1.0
2038
2039Content-Type: text/plain; charset="us-ascii"
2040MIME-Version: 1.0
2041Content-Transfer-Encoding: 7bit
2042
2043message 2
2044
2045--BOUNDARY--
2046''')
2047 del subpart1['content-type']
2048 del subpart1['mime-version']
2049 del subpart2['content-type']
2050 del subpart2['mime-version']
2051 eq(subpart1.get_content_type(), 'message/rfc822')
2052 eq(subpart1.get_default_type(), 'message/rfc822')
2053 eq(subpart2.get_content_type(), 'message/rfc822')
2054 eq(subpart2.get_default_type(), 'message/rfc822')
2055 neq(container.as_string(0), '''\
2056Content-Type: multipart/digest; boundary="BOUNDARY"
2057MIME-Version: 1.0
2058
2059--BOUNDARY
2060
2061Content-Type: text/plain; charset="us-ascii"
2062MIME-Version: 1.0
2063Content-Transfer-Encoding: 7bit
2064
2065message 1
2066
2067--BOUNDARY
2068
2069Content-Type: text/plain; charset="us-ascii"
2070MIME-Version: 1.0
2071Content-Transfer-Encoding: 7bit
2072
2073message 2
2074
2075--BOUNDARY--
2076''')
2077
2078 def test_mime_attachments_in_constructor(self):
2079 eq = self.assertEqual
2080 text1 = MIMEText('')
2081 text2 = MIMEText('')
2082 msg = MIMEMultipart(_subparts=(text1, text2))
2083 eq(len(msg.get_payload()), 2)
2084 eq(msg.get_payload(0), text1)
2085 eq(msg.get_payload(1), text2)
2086
Christian Heimes587c2bf2008-01-19 16:21:02 +00002087 def test_default_multipart_constructor(self):
2088 msg = MIMEMultipart()
2089 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002090
Ezio Melottib3aedd42010-11-20 19:04:17 +00002091
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002092# A general test of parser->model->generator idempotency. IOW, read a message
2093# in, parse it into a message object tree, then without touching the tree,
2094# regenerate the plain text. The original text and the transformed text
2095# should be identical. Note: that we ignore the Unix-From since that may
2096# contain a changed date.
2097class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002098
2099 linesep = '\n'
2100
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002101 def _msgobj(self, filename):
2102 with openfile(filename) as fp:
2103 data = fp.read()
2104 msg = email.message_from_string(data)
2105 return msg, data
2106
R. David Murray719a4492010-11-21 16:53:48 +00002107 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002108 eq = self.ndiffAssertEqual
2109 s = StringIO()
2110 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002111 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002112 eq(text, s.getvalue())
2113
2114 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002115 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002116 msg, text = self._msgobj('msg_01.txt')
2117 eq(msg.get_content_type(), 'text/plain')
2118 eq(msg.get_content_maintype(), 'text')
2119 eq(msg.get_content_subtype(), 'plain')
2120 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2121 eq(msg.get_param('charset'), 'us-ascii')
2122 eq(msg.preamble, None)
2123 eq(msg.epilogue, None)
2124 self._idempotent(msg, text)
2125
2126 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002127 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002128 msg, text = self._msgobj('msg_03.txt')
2129 eq(msg.get_content_type(), 'text/plain')
2130 eq(msg.get_params(), None)
2131 eq(msg.get_param('charset'), None)
2132 self._idempotent(msg, text)
2133
2134 def test_simple_multipart(self):
2135 msg, text = self._msgobj('msg_04.txt')
2136 self._idempotent(msg, text)
2137
2138 def test_MIME_digest(self):
2139 msg, text = self._msgobj('msg_02.txt')
2140 self._idempotent(msg, text)
2141
2142 def test_long_header(self):
2143 msg, text = self._msgobj('msg_27.txt')
2144 self._idempotent(msg, text)
2145
2146 def test_MIME_digest_with_part_headers(self):
2147 msg, text = self._msgobj('msg_28.txt')
2148 self._idempotent(msg, text)
2149
2150 def test_mixed_with_image(self):
2151 msg, text = self._msgobj('msg_06.txt')
2152 self._idempotent(msg, text)
2153
2154 def test_multipart_report(self):
2155 msg, text = self._msgobj('msg_05.txt')
2156 self._idempotent(msg, text)
2157
2158 def test_dsn(self):
2159 msg, text = self._msgobj('msg_16.txt')
2160 self._idempotent(msg, text)
2161
2162 def test_preamble_epilogue(self):
2163 msg, text = self._msgobj('msg_21.txt')
2164 self._idempotent(msg, text)
2165
2166 def test_multipart_one_part(self):
2167 msg, text = self._msgobj('msg_23.txt')
2168 self._idempotent(msg, text)
2169
2170 def test_multipart_no_parts(self):
2171 msg, text = self._msgobj('msg_24.txt')
2172 self._idempotent(msg, text)
2173
2174 def test_no_start_boundary(self):
2175 msg, text = self._msgobj('msg_31.txt')
2176 self._idempotent(msg, text)
2177
2178 def test_rfc2231_charset(self):
2179 msg, text = self._msgobj('msg_32.txt')
2180 self._idempotent(msg, text)
2181
2182 def test_more_rfc2231_parameters(self):
2183 msg, text = self._msgobj('msg_33.txt')
2184 self._idempotent(msg, text)
2185
2186 def test_text_plain_in_a_multipart_digest(self):
2187 msg, text = self._msgobj('msg_34.txt')
2188 self._idempotent(msg, text)
2189
2190 def test_nested_multipart_mixeds(self):
2191 msg, text = self._msgobj('msg_12a.txt')
2192 self._idempotent(msg, text)
2193
2194 def test_message_external_body_idempotent(self):
2195 msg, text = self._msgobj('msg_36.txt')
2196 self._idempotent(msg, text)
2197
R. David Murray719a4492010-11-21 16:53:48 +00002198 def test_message_delivery_status(self):
2199 msg, text = self._msgobj('msg_43.txt')
2200 self._idempotent(msg, text, unixfrom=True)
2201
R. David Murray96fd54e2010-10-08 15:55:28 +00002202 def test_message_signed_idempotent(self):
2203 msg, text = self._msgobj('msg_45.txt')
2204 self._idempotent(msg, text)
2205
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002206 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002207 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002208 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002209 # Get a message object and reset the seek pointer for other tests
2210 msg, text = self._msgobj('msg_05.txt')
2211 eq(msg.get_content_type(), 'multipart/report')
2212 # Test the Content-Type: parameters
2213 params = {}
2214 for pk, pv in msg.get_params():
2215 params[pk] = pv
2216 eq(params['report-type'], 'delivery-status')
2217 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002218 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2219 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002220 eq(len(msg.get_payload()), 3)
2221 # Make sure the subparts are what we expect
2222 msg1 = msg.get_payload(0)
2223 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002224 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002225 msg2 = msg.get_payload(1)
2226 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002227 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002228 msg3 = msg.get_payload(2)
2229 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002230 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002231 payload = msg3.get_payload()
2232 unless(isinstance(payload, list))
2233 eq(len(payload), 1)
2234 msg4 = payload[0]
2235 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002236 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002237
2238 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002239 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002240 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002241 msg, text = self._msgobj('msg_06.txt')
2242 # Check some of the outer headers
2243 eq(msg.get_content_type(), 'message/rfc822')
2244 # Make sure the payload is a list of exactly one sub-Message, and that
2245 # that submessage has a type of text/plain
2246 payload = msg.get_payload()
2247 unless(isinstance(payload, list))
2248 eq(len(payload), 1)
2249 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002250 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002251 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002252 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002253 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002254
2255
Ezio Melottib3aedd42010-11-20 19:04:17 +00002256
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002257# Test various other bits of the package's functionality
2258class TestMiscellaneous(TestEmailBase):
2259 def test_message_from_string(self):
2260 with openfile('msg_01.txt') as fp:
2261 text = fp.read()
2262 msg = email.message_from_string(text)
2263 s = StringIO()
2264 # Don't wrap/continue long headers since we're trying to test
2265 # idempotency.
2266 g = Generator(s, maxheaderlen=0)
2267 g.flatten(msg)
2268 self.assertEqual(text, s.getvalue())
2269
2270 def test_message_from_file(self):
2271 with openfile('msg_01.txt') as fp:
2272 text = fp.read()
2273 fp.seek(0)
2274 msg = email.message_from_file(fp)
2275 s = StringIO()
2276 # Don't wrap/continue long headers since we're trying to test
2277 # idempotency.
2278 g = Generator(s, maxheaderlen=0)
2279 g.flatten(msg)
2280 self.assertEqual(text, s.getvalue())
2281
2282 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002283 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002284 with openfile('msg_01.txt') as fp:
2285 text = fp.read()
2286
2287 # Create a subclass
2288 class MyMessage(Message):
2289 pass
2290
2291 msg = email.message_from_string(text, MyMessage)
2292 unless(isinstance(msg, MyMessage))
2293 # Try something more complicated
2294 with openfile('msg_02.txt') as fp:
2295 text = fp.read()
2296 msg = email.message_from_string(text, MyMessage)
2297 for subpart in msg.walk():
2298 unless(isinstance(subpart, MyMessage))
2299
2300 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002301 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002302 # Create a subclass
2303 class MyMessage(Message):
2304 pass
2305
2306 with openfile('msg_01.txt') as fp:
2307 msg = email.message_from_file(fp, MyMessage)
2308 unless(isinstance(msg, MyMessage))
2309 # Try something more complicated
2310 with openfile('msg_02.txt') as fp:
2311 msg = email.message_from_file(fp, MyMessage)
2312 for subpart in msg.walk():
2313 unless(isinstance(subpart, MyMessage))
2314
2315 def test__all__(self):
2316 module = __import__('email')
2317 # Can't use sorted() here due to Python 2.3 compatibility
2318 all = module.__all__[:]
2319 all.sort()
2320 self.assertEqual(all, [
2321 'base64mime', 'charset', 'encoders', 'errors', 'generator',
R. David Murray96fd54e2010-10-08 15:55:28 +00002322 'header', 'iterators', 'message', 'message_from_binary_file',
2323 'message_from_bytes', 'message_from_file',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002324 'message_from_string', 'mime', 'parser',
2325 'quoprimime', 'utils',
2326 ])
2327
2328 def test_formatdate(self):
2329 now = time.time()
2330 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2331 time.gmtime(now)[:6])
2332
2333 def test_formatdate_localtime(self):
2334 now = time.time()
2335 self.assertEqual(
2336 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2337 time.localtime(now)[:6])
2338
2339 def test_formatdate_usegmt(self):
2340 now = time.time()
2341 self.assertEqual(
2342 utils.formatdate(now, localtime=False),
2343 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2344 self.assertEqual(
2345 utils.formatdate(now, localtime=False, usegmt=True),
2346 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2347
2348 def test_parsedate_none(self):
2349 self.assertEqual(utils.parsedate(''), None)
2350
2351 def test_parsedate_compact(self):
2352 # The FWS after the comma is optional
2353 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2354 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2355
2356 def test_parsedate_no_dayofweek(self):
2357 eq = self.assertEqual
2358 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2359 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2360
2361 def test_parsedate_compact_no_dayofweek(self):
2362 eq = self.assertEqual
2363 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2364 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2365
R. David Murray4a62e892010-12-23 20:35:46 +00002366 def test_parsedate_no_space_before_positive_offset(self):
2367 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2368 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2369
2370 def test_parsedate_no_space_before_negative_offset(self):
2371 # Issue 1155362: we already handled '+' for this case.
2372 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2373 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2374
2375
R David Murrayaccd1c02011-03-13 20:06:23 -04002376 def test_parsedate_accepts_time_with_dots(self):
2377 eq = self.assertEqual
2378 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2379 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2380 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2381 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2382
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002383 def test_parsedate_acceptable_to_time_functions(self):
2384 eq = self.assertEqual
2385 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2386 t = int(time.mktime(timetup))
2387 eq(time.localtime(t)[:6], timetup[:6])
2388 eq(int(time.strftime('%Y', timetup)), 2003)
2389 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2390 t = int(time.mktime(timetup[:9]))
2391 eq(time.localtime(t)[:6], timetup[:6])
2392 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2393
R. David Murray219d1c82010-08-25 00:45:55 +00002394 def test_parsedate_y2k(self):
2395 """Test for parsing a date with a two-digit year.
2396
2397 Parsing a date with a two-digit year should return the correct
2398 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2399 obsoletes RFC822) requires four-digit years.
2400
2401 """
2402 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2403 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2404 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2405 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2406
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002407 def test_parseaddr_empty(self):
2408 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2409 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2410
2411 def test_noquote_dump(self):
2412 self.assertEqual(
2413 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2414 'A Silly Person <person@dom.ain>')
2415
2416 def test_escape_dump(self):
2417 self.assertEqual(
2418 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2419 r'"A \(Very\) Silly Person" <person@dom.ain>')
2420 a = r'A \(Special\) Person'
2421 b = 'person@dom.ain'
2422 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2423
2424 def test_escape_backslashes(self):
2425 self.assertEqual(
2426 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2427 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2428 a = r'Arthur \Backslash\ Foobar'
2429 b = 'person@dom.ain'
2430 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2431
R David Murray8debacb2011-04-06 09:35:57 -04002432 def test_quotes_unicode_names(self):
2433 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2434 name = "H\u00e4ns W\u00fcrst"
2435 addr = 'person@dom.ain'
2436 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2437 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
2438 self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
2439 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
2440 latin1_quopri)
2441
2442 def test_accepts_any_charset_like_object(self):
2443 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2444 name = "H\u00e4ns W\u00fcrst"
2445 addr = 'person@dom.ain'
2446 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2447 foobar = "FOOBAR"
2448 class CharsetMock:
2449 def header_encode(self, string):
2450 return foobar
2451 mock = CharsetMock()
2452 mock_expected = "%s <%s>" % (foobar, addr)
2453 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
2454 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
2455 utf8_base64)
2456
2457 def test_invalid_charset_like_object_raises_error(self):
2458 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2459 name = "H\u00e4ns W\u00fcrst"
2460 addr = 'person@dom.ain'
2461 # A object without a header_encode method:
2462 bad_charset = object()
2463 self.assertRaises(AttributeError, utils.formataddr, (name, addr),
2464 bad_charset)
2465
2466 def test_unicode_address_raises_error(self):
2467 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2468 addr = 'pers\u00f6n@dom.in'
2469 self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
2470 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
2471
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002472 def test_name_with_dot(self):
2473 x = 'John X. Doe <jxd@example.com>'
2474 y = '"John X. Doe" <jxd@example.com>'
2475 a, b = ('John X. Doe', 'jxd@example.com')
2476 self.assertEqual(utils.parseaddr(x), (a, b))
2477 self.assertEqual(utils.parseaddr(y), (a, b))
2478 # formataddr() quotes the name if there's a dot in it
2479 self.assertEqual(utils.formataddr((a, b)), y)
2480
R. David Murray5397e862010-10-02 15:58:26 +00002481 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2482 # issue 10005. Note that in the third test the second pair of
2483 # backslashes is not actually a quoted pair because it is not inside a
2484 # comment or quoted string: the address being parsed has a quoted
2485 # string containing a quoted backslash, followed by 'example' and two
2486 # backslashes, followed by another quoted string containing a space and
2487 # the word 'example'. parseaddr copies those two backslashes
2488 # literally. Per rfc5322 this is not technically correct since a \ may
2489 # not appear in an address outside of a quoted string. It is probably
2490 # a sensible Postel interpretation, though.
2491 eq = self.assertEqual
2492 eq(utils.parseaddr('""example" example"@example.com'),
2493 ('', '""example" example"@example.com'))
2494 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2495 ('', '"\\"example\\" example"@example.com'))
2496 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2497 ('', '"\\\\"example\\\\" example"@example.com'))
2498
R. David Murray63563cd2010-12-18 18:25:38 +00002499 def test_parseaddr_preserves_spaces_in_local_part(self):
2500 # issue 9286. A normal RFC5322 local part should not contain any
2501 # folding white space, but legacy local parts can (they are a sequence
2502 # of atoms, not dotatoms). On the other hand we strip whitespace from
2503 # before the @ and around dots, on the assumption that the whitespace
2504 # around the punctuation is a mistake in what would otherwise be
2505 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2506 self.assertEqual(('', "merwok wok@xample.com"),
2507 utils.parseaddr("merwok wok@xample.com"))
2508 self.assertEqual(('', "merwok wok@xample.com"),
2509 utils.parseaddr("merwok wok@xample.com"))
2510 self.assertEqual(('', "merwok wok@xample.com"),
2511 utils.parseaddr(" merwok wok @xample.com"))
2512 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2513 utils.parseaddr('merwok"wok" wok@xample.com'))
2514 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2515 utils.parseaddr('merwok. wok . wok@xample.com'))
2516
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002517 def test_multiline_from_comment(self):
2518 x = """\
2519Foo
2520\tBar <foo@example.com>"""
2521 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2522
2523 def test_quote_dump(self):
2524 self.assertEqual(
2525 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2526 r'"A Silly; Person" <person@dom.ain>')
2527
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002528 def test_charset_richcomparisons(self):
2529 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002530 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002531 cset1 = Charset()
2532 cset2 = Charset()
2533 eq(cset1, 'us-ascii')
2534 eq(cset1, 'US-ASCII')
2535 eq(cset1, 'Us-AsCiI')
2536 eq('us-ascii', cset1)
2537 eq('US-ASCII', cset1)
2538 eq('Us-AsCiI', cset1)
2539 ne(cset1, 'usascii')
2540 ne(cset1, 'USASCII')
2541 ne(cset1, 'UsAsCiI')
2542 ne('usascii', cset1)
2543 ne('USASCII', cset1)
2544 ne('UsAsCiI', cset1)
2545 eq(cset1, cset2)
2546 eq(cset2, cset1)
2547
2548 def test_getaddresses(self):
2549 eq = self.assertEqual
2550 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2551 'Bud Person <bperson@dom.ain>']),
2552 [('Al Person', 'aperson@dom.ain'),
2553 ('Bud Person', 'bperson@dom.ain')])
2554
2555 def test_getaddresses_nasty(self):
2556 eq = self.assertEqual
2557 eq(utils.getaddresses(['foo: ;']), [('', '')])
2558 eq(utils.getaddresses(
2559 ['[]*-- =~$']),
2560 [('', ''), ('', ''), ('', '*--')])
2561 eq(utils.getaddresses(
2562 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2563 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2564
2565 def test_getaddresses_embedded_comment(self):
2566 """Test proper handling of a nested comment"""
2567 eq = self.assertEqual
2568 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2569 eq(addrs[0][1], 'foo@bar.com')
2570
2571 def test_utils_quote_unquote(self):
2572 eq = self.assertEqual
2573 msg = Message()
2574 msg.add_header('content-disposition', 'attachment',
2575 filename='foo\\wacky"name')
2576 eq(msg.get_filename(), 'foo\\wacky"name')
2577
2578 def test_get_body_encoding_with_bogus_charset(self):
2579 charset = Charset('not a charset')
2580 self.assertEqual(charset.get_body_encoding(), 'base64')
2581
2582 def test_get_body_encoding_with_uppercase_charset(self):
2583 eq = self.assertEqual
2584 msg = Message()
2585 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2586 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2587 charsets = msg.get_charsets()
2588 eq(len(charsets), 1)
2589 eq(charsets[0], 'utf-8')
2590 charset = Charset(charsets[0])
2591 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002592 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002593 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2594 eq(msg.get_payload(decode=True), b'hello world')
2595 eq(msg['content-transfer-encoding'], 'base64')
2596 # Try another one
2597 msg = Message()
2598 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2599 charsets = msg.get_charsets()
2600 eq(len(charsets), 1)
2601 eq(charsets[0], 'us-ascii')
2602 charset = Charset(charsets[0])
2603 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2604 msg.set_payload('hello world', charset=charset)
2605 eq(msg.get_payload(), 'hello world')
2606 eq(msg['content-transfer-encoding'], '7bit')
2607
2608 def test_charsets_case_insensitive(self):
2609 lc = Charset('us-ascii')
2610 uc = Charset('US-ASCII')
2611 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2612
2613 def test_partial_falls_inside_message_delivery_status(self):
2614 eq = self.ndiffAssertEqual
2615 # The Parser interface provides chunks of data to FeedParser in 8192
2616 # byte gulps. SF bug #1076485 found one of those chunks inside
2617 # message/delivery-status header block, which triggered an
2618 # unreadline() of NeedMoreData.
2619 msg = self._msgobj('msg_43.txt')
2620 sfp = StringIO()
2621 iterators._structure(msg, sfp)
2622 eq(sfp.getvalue(), """\
2623multipart/report
2624 text/plain
2625 message/delivery-status
2626 text/plain
2627 text/plain
2628 text/plain
2629 text/plain
2630 text/plain
2631 text/plain
2632 text/plain
2633 text/plain
2634 text/plain
2635 text/plain
2636 text/plain
2637 text/plain
2638 text/plain
2639 text/plain
2640 text/plain
2641 text/plain
2642 text/plain
2643 text/plain
2644 text/plain
2645 text/plain
2646 text/plain
2647 text/plain
2648 text/plain
2649 text/plain
2650 text/plain
2651 text/plain
2652 text/rfc822-headers
2653""")
2654
R. David Murraya0b44b52010-12-02 21:47:19 +00002655 def test_make_msgid_domain(self):
2656 self.assertEqual(
2657 email.utils.make_msgid(domain='testdomain-string')[-19:],
2658 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002659
Ezio Melottib3aedd42010-11-20 19:04:17 +00002660
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002661# Test the iterator/generators
2662class TestIterators(TestEmailBase):
2663 def test_body_line_iterator(self):
2664 eq = self.assertEqual
2665 neq = self.ndiffAssertEqual
2666 # First a simple non-multipart message
2667 msg = self._msgobj('msg_01.txt')
2668 it = iterators.body_line_iterator(msg)
2669 lines = list(it)
2670 eq(len(lines), 6)
2671 neq(EMPTYSTRING.join(lines), msg.get_payload())
2672 # Now a more complicated multipart
2673 msg = self._msgobj('msg_02.txt')
2674 it = iterators.body_line_iterator(msg)
2675 lines = list(it)
2676 eq(len(lines), 43)
2677 with openfile('msg_19.txt') as fp:
2678 neq(EMPTYSTRING.join(lines), fp.read())
2679
2680 def test_typed_subpart_iterator(self):
2681 eq = self.assertEqual
2682 msg = self._msgobj('msg_04.txt')
2683 it = iterators.typed_subpart_iterator(msg, 'text')
2684 lines = []
2685 subparts = 0
2686 for subpart in it:
2687 subparts += 1
2688 lines.append(subpart.get_payload())
2689 eq(subparts, 2)
2690 eq(EMPTYSTRING.join(lines), """\
2691a simple kind of mirror
2692to reflect upon our own
2693a simple kind of mirror
2694to reflect upon our own
2695""")
2696
2697 def test_typed_subpart_iterator_default_type(self):
2698 eq = self.assertEqual
2699 msg = self._msgobj('msg_03.txt')
2700 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2701 lines = []
2702 subparts = 0
2703 for subpart in it:
2704 subparts += 1
2705 lines.append(subpart.get_payload())
2706 eq(subparts, 1)
2707 eq(EMPTYSTRING.join(lines), """\
2708
2709Hi,
2710
2711Do you like this message?
2712
2713-Me
2714""")
2715
R. David Murray45bf773f2010-07-17 01:19:57 +00002716 def test_pushCR_LF(self):
2717 '''FeedParser BufferedSubFile.push() assumed it received complete
2718 line endings. A CR ending one push() followed by a LF starting
2719 the next push() added an empty line.
2720 '''
2721 imt = [
2722 ("a\r \n", 2),
2723 ("b", 0),
2724 ("c\n", 1),
2725 ("", 0),
2726 ("d\r\n", 1),
2727 ("e\r", 0),
2728 ("\nf", 1),
2729 ("\r\n", 1),
2730 ]
2731 from email.feedparser import BufferedSubFile, NeedMoreData
2732 bsf = BufferedSubFile()
2733 om = []
2734 nt = 0
2735 for il, n in imt:
2736 bsf.push(il)
2737 nt += n
2738 n1 = 0
2739 while True:
2740 ol = bsf.readline()
2741 if ol == NeedMoreData:
2742 break
2743 om.append(ol)
2744 n1 += 1
2745 self.assertTrue(n == n1)
2746 self.assertTrue(len(om) == nt)
2747 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2748
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002749
Ezio Melottib3aedd42010-11-20 19:04:17 +00002750
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002751class TestParsers(TestEmailBase):
2752 def test_header_parser(self):
2753 eq = self.assertEqual
2754 # Parse only the headers of a complex multipart MIME document
2755 with openfile('msg_02.txt') as fp:
2756 msg = HeaderParser().parse(fp)
2757 eq(msg['from'], 'ppp-request@zzz.org')
2758 eq(msg['to'], 'ppp@zzz.org')
2759 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002760 self.assertFalse(msg.is_multipart())
2761 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002762
2763 def test_whitespace_continuation(self):
2764 eq = self.assertEqual
2765 # This message contains a line after the Subject: header that has only
2766 # whitespace, but it is not empty!
2767 msg = email.message_from_string("""\
2768From: aperson@dom.ain
2769To: bperson@dom.ain
2770Subject: the next line has a space on it
2771\x20
2772Date: Mon, 8 Apr 2002 15:09:19 -0400
2773Message-ID: spam
2774
2775Here's the message body
2776""")
2777 eq(msg['subject'], 'the next line has a space on it\n ')
2778 eq(msg['message-id'], 'spam')
2779 eq(msg.get_payload(), "Here's the message body\n")
2780
2781 def test_whitespace_continuation_last_header(self):
2782 eq = self.assertEqual
2783 # Like the previous test, but the subject line is the last
2784 # header.
2785 msg = email.message_from_string("""\
2786From: aperson@dom.ain
2787To: bperson@dom.ain
2788Date: Mon, 8 Apr 2002 15:09:19 -0400
2789Message-ID: spam
2790Subject: the next line has a space on it
2791\x20
2792
2793Here's the message body
2794""")
2795 eq(msg['subject'], 'the next line has a space on it\n ')
2796 eq(msg['message-id'], 'spam')
2797 eq(msg.get_payload(), "Here's the message body\n")
2798
2799 def test_crlf_separation(self):
2800 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002801 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002802 msg = Parser().parse(fp)
2803 eq(len(msg.get_payload()), 2)
2804 part1 = msg.get_payload(0)
2805 eq(part1.get_content_type(), 'text/plain')
2806 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2807 part2 = msg.get_payload(1)
2808 eq(part2.get_content_type(), 'application/riscos')
2809
R. David Murray8451c4b2010-10-23 22:19:56 +00002810 def test_crlf_flatten(self):
2811 # Using newline='\n' preserves the crlfs in this input file.
2812 with openfile('msg_26.txt', newline='\n') as fp:
2813 text = fp.read()
2814 msg = email.message_from_string(text)
2815 s = StringIO()
2816 g = Generator(s)
2817 g.flatten(msg, linesep='\r\n')
2818 self.assertEqual(s.getvalue(), text)
2819
2820 maxDiff = None
2821
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002822 def test_multipart_digest_with_extra_mime_headers(self):
2823 eq = self.assertEqual
2824 neq = self.ndiffAssertEqual
2825 with openfile('msg_28.txt') as fp:
2826 msg = email.message_from_file(fp)
2827 # Structure is:
2828 # multipart/digest
2829 # message/rfc822
2830 # text/plain
2831 # message/rfc822
2832 # text/plain
2833 eq(msg.is_multipart(), 1)
2834 eq(len(msg.get_payload()), 2)
2835 part1 = msg.get_payload(0)
2836 eq(part1.get_content_type(), 'message/rfc822')
2837 eq(part1.is_multipart(), 1)
2838 eq(len(part1.get_payload()), 1)
2839 part1a = part1.get_payload(0)
2840 eq(part1a.is_multipart(), 0)
2841 eq(part1a.get_content_type(), 'text/plain')
2842 neq(part1a.get_payload(), 'message 1\n')
2843 # next message/rfc822
2844 part2 = msg.get_payload(1)
2845 eq(part2.get_content_type(), 'message/rfc822')
2846 eq(part2.is_multipart(), 1)
2847 eq(len(part2.get_payload()), 1)
2848 part2a = part2.get_payload(0)
2849 eq(part2a.is_multipart(), 0)
2850 eq(part2a.get_content_type(), 'text/plain')
2851 neq(part2a.get_payload(), 'message 2\n')
2852
2853 def test_three_lines(self):
2854 # A bug report by Andrew McNamara
2855 lines = ['From: Andrew Person <aperson@dom.ain',
2856 'Subject: Test',
2857 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2858 msg = email.message_from_string(NL.join(lines))
2859 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
2860
2861 def test_strip_line_feed_and_carriage_return_in_headers(self):
2862 eq = self.assertEqual
2863 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
2864 value1 = 'text'
2865 value2 = 'more text'
2866 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
2867 value1, value2)
2868 msg = email.message_from_string(m)
2869 eq(msg.get('Header'), value1)
2870 eq(msg.get('Next-Header'), value2)
2871
2872 def test_rfc2822_header_syntax(self):
2873 eq = self.assertEqual
2874 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2875 msg = email.message_from_string(m)
2876 eq(len(msg), 3)
2877 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
2878 eq(msg.get_payload(), 'body')
2879
2880 def test_rfc2822_space_not_allowed_in_header(self):
2881 eq = self.assertEqual
2882 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2883 msg = email.message_from_string(m)
2884 eq(len(msg.keys()), 0)
2885
2886 def test_rfc2822_one_character_header(self):
2887 eq = self.assertEqual
2888 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
2889 msg = email.message_from_string(m)
2890 headers = msg.keys()
2891 headers.sort()
2892 eq(headers, ['A', 'B', 'CC'])
2893 eq(msg.get_payload(), 'body')
2894
R. David Murray45e0e142010-06-16 02:19:40 +00002895 def test_CRLFLF_at_end_of_part(self):
2896 # issue 5610: feedparser should not eat two chars from body part ending
2897 # with "\r\n\n".
2898 m = (
2899 "From: foo@bar.com\n"
2900 "To: baz\n"
2901 "Mime-Version: 1.0\n"
2902 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
2903 "\n"
2904 "--BOUNDARY\n"
2905 "Content-Type: text/plain\n"
2906 "\n"
2907 "body ending with CRLF newline\r\n"
2908 "\n"
2909 "--BOUNDARY--\n"
2910 )
2911 msg = email.message_from_string(m)
2912 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002913
Ezio Melottib3aedd42010-11-20 19:04:17 +00002914
R. David Murray96fd54e2010-10-08 15:55:28 +00002915class Test8BitBytesHandling(unittest.TestCase):
2916 # In Python3 all input is string, but that doesn't work if the actual input
2917 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
2918 # decode byte streams using the surrogateescape error handler, and
2919 # reconvert to binary at appropriate places if we detect surrogates. This
2920 # doesn't allow us to transform headers with 8bit bytes (they get munged),
2921 # but it does allow us to parse and preserve them, and to decode body
2922 # parts that use an 8bit CTE.
2923
2924 bodytest_msg = textwrap.dedent("""\
2925 From: foo@bar.com
2926 To: baz
2927 Mime-Version: 1.0
2928 Content-Type: text/plain; charset={charset}
2929 Content-Transfer-Encoding: {cte}
2930
2931 {bodyline}
2932 """)
2933
2934 def test_known_8bit_CTE(self):
2935 m = self.bodytest_msg.format(charset='utf-8',
2936 cte='8bit',
2937 bodyline='pöstal').encode('utf-8')
2938 msg = email.message_from_bytes(m)
2939 self.assertEqual(msg.get_payload(), "pöstal\n")
2940 self.assertEqual(msg.get_payload(decode=True),
2941 "pöstal\n".encode('utf-8'))
2942
2943 def test_unknown_8bit_CTE(self):
2944 m = self.bodytest_msg.format(charset='notavalidcharset',
2945 cte='8bit',
2946 bodyline='pöstal').encode('utf-8')
2947 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00002948 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00002949 self.assertEqual(msg.get_payload(decode=True),
2950 "pöstal\n".encode('utf-8'))
2951
2952 def test_8bit_in_quopri_body(self):
2953 # This is non-RFC compliant data...without 'decode' the library code
2954 # decodes the body using the charset from the headers, and because the
2955 # source byte really is utf-8 this works. This is likely to fail
2956 # against real dirty data (ie: produce mojibake), but the data is
2957 # invalid anyway so it is as good a guess as any. But this means that
2958 # this test just confirms the current behavior; that behavior is not
2959 # necessarily the best possible behavior. With 'decode' it is
2960 # returning the raw bytes, so that test should be of correct behavior,
2961 # or at least produce the same result that email4 did.
2962 m = self.bodytest_msg.format(charset='utf-8',
2963 cte='quoted-printable',
2964 bodyline='p=C3=B6stál').encode('utf-8')
2965 msg = email.message_from_bytes(m)
2966 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
2967 self.assertEqual(msg.get_payload(decode=True),
2968 'pöstál\n'.encode('utf-8'))
2969
2970 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
2971 # This is similar to the previous test, but proves that if the 8bit
2972 # byte is undecodeable in the specified charset, it gets replaced
2973 # by the unicode 'unknown' character. Again, this may or may not
2974 # be the ideal behavior. Note that if decode=False none of the
2975 # decoders will get involved, so this is the only test we need
2976 # for this behavior.
2977 m = self.bodytest_msg.format(charset='ascii',
2978 cte='quoted-printable',
2979 bodyline='p=C3=B6stál').encode('utf-8')
2980 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00002981 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00002982 self.assertEqual(msg.get_payload(decode=True),
2983 'pöstál\n'.encode('utf-8'))
2984
2985 def test_8bit_in_base64_body(self):
2986 # Sticking an 8bit byte in a base64 block makes it undecodable by
2987 # normal means, so the block is returned undecoded, but as bytes.
2988 m = self.bodytest_msg.format(charset='utf-8',
2989 cte='base64',
2990 bodyline='cMO2c3RhbAá=').encode('utf-8')
2991 msg = email.message_from_bytes(m)
2992 self.assertEqual(msg.get_payload(decode=True),
2993 'cMO2c3RhbAá=\n'.encode('utf-8'))
2994
2995 def test_8bit_in_uuencode_body(self):
2996 # Sticking an 8bit byte in a uuencode block makes it undecodable by
2997 # normal means, so the block is returned undecoded, but as bytes.
2998 m = self.bodytest_msg.format(charset='utf-8',
2999 cte='uuencode',
3000 bodyline='<,.V<W1A; á ').encode('utf-8')
3001 msg = email.message_from_bytes(m)
3002 self.assertEqual(msg.get_payload(decode=True),
3003 '<,.V<W1A; á \n'.encode('utf-8'))
3004
3005
R. David Murray92532142011-01-07 23:25:30 +00003006 headertest_headers = (
3007 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3008 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3009 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3010 '\tJean de Baddie',
3011 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3012 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3013 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3014 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3015 )
3016 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3017 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003018
3019 def test_get_8bit_header(self):
3020 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003021 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3022 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003023
3024 def test_print_8bit_headers(self):
3025 msg = email.message_from_bytes(self.headertest_msg)
3026 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003027 textwrap.dedent("""\
3028 From: {}
3029 To: {}
3030 Subject: {}
3031 From: {}
3032
3033 Yes, they are flying.
3034 """).format(*[expected[1] for (_, expected) in
3035 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003036
3037 def test_values_with_8bit_headers(self):
3038 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003039 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003040 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003041 'b\uFFFD\uFFFDz',
3042 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3043 'coll\uFFFD\uFFFDgue, le pouf '
3044 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003045 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003046 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003047
3048 def test_items_with_8bit_headers(self):
3049 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003050 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003051 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003052 ('To', 'b\uFFFD\uFFFDz'),
3053 ('Subject', 'Maintenant je vous '
3054 'pr\uFFFD\uFFFDsente '
3055 'mon coll\uFFFD\uFFFDgue, le pouf '
3056 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3057 '\tJean de Baddie'),
3058 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003059
3060 def test_get_all_with_8bit_headers(self):
3061 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003062 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003063 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003064 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003065
R David Murraya2150232011-03-16 21:11:23 -04003066 def test_get_content_type_with_8bit(self):
3067 msg = email.message_from_bytes(textwrap.dedent("""\
3068 Content-Type: text/pl\xA7in; charset=utf-8
3069 """).encode('latin-1'))
3070 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3071 self.assertEqual(msg.get_content_maintype(), "text")
3072 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3073
3074 def test_get_params_with_8bit(self):
3075 msg = email.message_from_bytes(
3076 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3077 self.assertEqual(msg.get_params(header='x-header'),
3078 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3079 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3080 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3081 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3082
3083 def test_get_rfc2231_params_with_8bit(self):
3084 msg = email.message_from_bytes(textwrap.dedent("""\
3085 Content-Type: text/plain; charset=us-ascii;
3086 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3087 ).encode('latin-1'))
3088 self.assertEqual(msg.get_param('title'),
3089 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3090
3091 def test_set_rfc2231_params_with_8bit(self):
3092 msg = email.message_from_bytes(textwrap.dedent("""\
3093 Content-Type: text/plain; charset=us-ascii;
3094 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3095 ).encode('latin-1'))
3096 msg.set_param('title', 'test')
3097 self.assertEqual(msg.get_param('title'), 'test')
3098
3099 def test_del_rfc2231_params_with_8bit(self):
3100 msg = email.message_from_bytes(textwrap.dedent("""\
3101 Content-Type: text/plain; charset=us-ascii;
3102 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3103 ).encode('latin-1'))
3104 msg.del_param('title')
3105 self.assertEqual(msg.get_param('title'), None)
3106 self.assertEqual(msg.get_content_maintype(), 'text')
3107
3108 def test_get_payload_with_8bit_cte_header(self):
3109 msg = email.message_from_bytes(textwrap.dedent("""\
3110 Content-Transfer-Encoding: b\xa7se64
3111 Content-Type: text/plain; charset=latin-1
3112
3113 payload
3114 """).encode('latin-1'))
3115 self.assertEqual(msg.get_payload(), 'payload\n')
3116 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3117
R. David Murray96fd54e2010-10-08 15:55:28 +00003118 non_latin_bin_msg = textwrap.dedent("""\
3119 From: foo@bar.com
3120 To: báz
3121 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3122 \tJean de Baddie
3123 Mime-Version: 1.0
3124 Content-Type: text/plain; charset="utf-8"
3125 Content-Transfer-Encoding: 8bit
3126
3127 Да, они летят.
3128 """).encode('utf-8')
3129
3130 def test_bytes_generator(self):
3131 msg = email.message_from_bytes(self.non_latin_bin_msg)
3132 out = BytesIO()
3133 email.generator.BytesGenerator(out).flatten(msg)
3134 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3135
R. David Murray7372a072011-01-26 21:21:32 +00003136 def test_bytes_generator_handles_None_body(self):
3137 #Issue 11019
3138 msg = email.message.Message()
3139 out = BytesIO()
3140 email.generator.BytesGenerator(out).flatten(msg)
3141 self.assertEqual(out.getvalue(), b"\n")
3142
R. David Murray92532142011-01-07 23:25:30 +00003143 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003144 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003145 To: =?unknown-8bit?q?b=C3=A1z?=
3146 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3147 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3148 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003149 Mime-Version: 1.0
3150 Content-Type: text/plain; charset="utf-8"
3151 Content-Transfer-Encoding: base64
3152
3153 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3154 """)
3155
3156 def test_generator_handles_8bit(self):
3157 msg = email.message_from_bytes(self.non_latin_bin_msg)
3158 out = StringIO()
3159 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003160 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003161
3162 def test_bytes_generator_with_unix_from(self):
3163 # The unixfrom contains a current date, so we can't check it
3164 # literally. Just make sure the first word is 'From' and the
3165 # rest of the message matches the input.
3166 msg = email.message_from_bytes(self.non_latin_bin_msg)
3167 out = BytesIO()
3168 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3169 lines = out.getvalue().split(b'\n')
3170 self.assertEqual(lines[0].split()[0], b'From')
3171 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3172
R. David Murray92532142011-01-07 23:25:30 +00003173 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3174 non_latin_bin_msg_as7bit[2:4] = [
3175 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3176 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3177 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3178
R. David Murray96fd54e2010-10-08 15:55:28 +00003179 def test_message_from_binary_file(self):
3180 fn = 'test.msg'
3181 self.addCleanup(unlink, fn)
3182 with open(fn, 'wb') as testfile:
3183 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003184 with open(fn, 'rb') as testfile:
3185 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003186 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3187
3188 latin_bin_msg = textwrap.dedent("""\
3189 From: foo@bar.com
3190 To: Dinsdale
3191 Subject: Nudge nudge, wink, wink
3192 Mime-Version: 1.0
3193 Content-Type: text/plain; charset="latin-1"
3194 Content-Transfer-Encoding: 8bit
3195
3196 oh là là, know what I mean, know what I mean?
3197 """).encode('latin-1')
3198
3199 latin_bin_msg_as7bit = textwrap.dedent("""\
3200 From: foo@bar.com
3201 To: Dinsdale
3202 Subject: Nudge nudge, wink, wink
3203 Mime-Version: 1.0
3204 Content-Type: text/plain; charset="iso-8859-1"
3205 Content-Transfer-Encoding: quoted-printable
3206
3207 oh l=E0 l=E0, know what I mean, know what I mean?
3208 """)
3209
3210 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3211 m = email.message_from_bytes(self.latin_bin_msg)
3212 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3213
3214 def test_decoded_generator_emits_unicode_body(self):
3215 m = email.message_from_bytes(self.latin_bin_msg)
3216 out = StringIO()
3217 email.generator.DecodedGenerator(out).flatten(m)
3218 #DecodedHeader output contains an extra blank line compared
3219 #to the input message. RDM: not sure if this is a bug or not,
3220 #but it is not specific to the 8bit->7bit conversion.
3221 self.assertEqual(out.getvalue(),
3222 self.latin_bin_msg.decode('latin-1')+'\n')
3223
3224 def test_bytes_feedparser(self):
3225 bfp = email.feedparser.BytesFeedParser()
3226 for i in range(0, len(self.latin_bin_msg), 10):
3227 bfp.feed(self.latin_bin_msg[i:i+10])
3228 m = bfp.close()
3229 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3230
R. David Murray8451c4b2010-10-23 22:19:56 +00003231 def test_crlf_flatten(self):
3232 with openfile('msg_26.txt', 'rb') as fp:
3233 text = fp.read()
3234 msg = email.message_from_bytes(text)
3235 s = BytesIO()
3236 g = email.generator.BytesGenerator(s)
3237 g.flatten(msg, linesep='\r\n')
3238 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003239
3240 def test_8bit_multipart(self):
3241 # Issue 11605
3242 source = textwrap.dedent("""\
3243 Date: Fri, 18 Mar 2011 17:15:43 +0100
3244 To: foo@example.com
3245 From: foodwatch-Newsletter <bar@example.com>
3246 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3247 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3248 MIME-Version: 1.0
3249 Content-Type: multipart/alternative;
3250 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3251
3252 --b1_76a486bee62b0d200f33dc2ca08220ad
3253 Content-Type: text/plain; charset="utf-8"
3254 Content-Transfer-Encoding: 8bit
3255
3256 Guten Tag, ,
3257
3258 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3259 Nachrichten aus Japan.
3260
3261
3262 --b1_76a486bee62b0d200f33dc2ca08220ad
3263 Content-Type: text/html; charset="utf-8"
3264 Content-Transfer-Encoding: 8bit
3265
3266 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3267 "http://www.w3.org/TR/html4/loose.dtd">
3268 <html lang="de">
3269 <head>
3270 <title>foodwatch - Newsletter</title>
3271 </head>
3272 <body>
3273 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3274 die Nachrichten aus Japan.</p>
3275 </body>
3276 </html>
3277 --b1_76a486bee62b0d200f33dc2ca08220ad--
3278
3279 """).encode('utf-8')
3280 msg = email.message_from_bytes(source)
3281 s = BytesIO()
3282 g = email.generator.BytesGenerator(s)
3283 g.flatten(msg)
3284 self.assertEqual(s.getvalue(), source)
3285
R. David Murray8451c4b2010-10-23 22:19:56 +00003286 maxDiff = None
3287
Ezio Melottib3aedd42010-11-20 19:04:17 +00003288
R. David Murray719a4492010-11-21 16:53:48 +00003289class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003290
R. David Murraye5db2632010-11-20 15:10:13 +00003291 maxDiff = None
3292
R. David Murray96fd54e2010-10-08 15:55:28 +00003293 def _msgobj(self, filename):
3294 with openfile(filename, 'rb') as fp:
3295 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003296 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003297 msg = email.message_from_bytes(data)
3298 return msg, data
3299
R. David Murray719a4492010-11-21 16:53:48 +00003300 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003301 b = BytesIO()
3302 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003303 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R David Murraya46ed112011-03-31 13:11:40 -04003304 self.assertEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003305
3306
R. David Murray719a4492010-11-21 16:53:48 +00003307class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3308 TestIdempotent):
3309 linesep = '\n'
3310 blinesep = b'\n'
3311 normalize_linesep_regex = re.compile(br'\r\n')
3312
3313
3314class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3315 TestIdempotent):
3316 linesep = '\r\n'
3317 blinesep = b'\r\n'
3318 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3319
Ezio Melottib3aedd42010-11-20 19:04:17 +00003320
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003321class TestBase64(unittest.TestCase):
3322 def test_len(self):
3323 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003324 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003325 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003326 for size in range(15):
3327 if size == 0 : bsize = 0
3328 elif size <= 3 : bsize = 4
3329 elif size <= 6 : bsize = 8
3330 elif size <= 9 : bsize = 12
3331 elif size <= 12: bsize = 16
3332 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003333 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003334
3335 def test_decode(self):
3336 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003337 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003338 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003339
3340 def test_encode(self):
3341 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003342 eq(base64mime.body_encode(b''), b'')
3343 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003344 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003345 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003346 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003347 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003348eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3349eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3350eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3351eHh4eCB4eHh4IA==
3352""")
3353 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003354 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003355 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003356eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3357eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3358eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3359eHh4eCB4eHh4IA==\r
3360""")
3361
3362 def test_header_encode(self):
3363 eq = self.assertEqual
3364 he = base64mime.header_encode
3365 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003366 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3367 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003368 # Test the charset option
3369 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3370 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003371
3372
Ezio Melottib3aedd42010-11-20 19:04:17 +00003373
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003374class TestQuopri(unittest.TestCase):
3375 def setUp(self):
3376 # Set of characters (as byte integers) that don't need to be encoded
3377 # in headers.
3378 self.hlit = list(chain(
3379 range(ord('a'), ord('z') + 1),
3380 range(ord('A'), ord('Z') + 1),
3381 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003382 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003383 # Set of characters (as byte integers) that do need to be encoded in
3384 # headers.
3385 self.hnon = [c for c in range(256) if c not in self.hlit]
3386 assert len(self.hlit) + len(self.hnon) == 256
3387 # Set of characters (as byte integers) that don't need to be encoded
3388 # in bodies.
3389 self.blit = list(range(ord(' '), ord('~') + 1))
3390 self.blit.append(ord('\t'))
3391 self.blit.remove(ord('='))
3392 # Set of characters (as byte integers) that do need to be encoded in
3393 # bodies.
3394 self.bnon = [c for c in range(256) if c not in self.blit]
3395 assert len(self.blit) + len(self.bnon) == 256
3396
Guido van Rossum9604e662007-08-30 03:46:43 +00003397 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003398 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003399 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003400 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003401 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003402 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003403 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003404
Guido van Rossum9604e662007-08-30 03:46:43 +00003405 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003406 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003407 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003408 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003409 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003410 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003411 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003412
3413 def test_header_quopri_len(self):
3414 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003415 eq(quoprimime.header_length(b'hello'), 5)
3416 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003417 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003418 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003419 # =?xxx?q?...?= means 10 extra characters
3420 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003421 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3422 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003423 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003424 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003425 # =?xxx?q?...?= means 10 extra characters
3426 10)
3427 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003428 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003429 'expected length 1 for %r' % chr(c))
3430 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003431 # Space is special; it's encoded to _
3432 if c == ord(' '):
3433 continue
3434 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003435 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003436 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003437
3438 def test_body_quopri_len(self):
3439 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003440 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003441 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003442 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003443 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003444
3445 def test_quote_unquote_idempotent(self):
3446 for x in range(256):
3447 c = chr(x)
3448 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3449
R David Murrayec1b5b82011-03-23 14:19:05 -04003450 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3451 if charset is None:
3452 encoded_header = quoprimime.header_encode(header)
3453 else:
3454 encoded_header = quoprimime.header_encode(header, charset)
3455 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003456
R David Murraycafd79d2011-03-23 15:25:55 -04003457 def test_header_encode_null(self):
3458 self._test_header_encode(b'', '')
3459
R David Murrayec1b5b82011-03-23 14:19:05 -04003460 def test_header_encode_one_word(self):
3461 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3462
3463 def test_header_encode_two_lines(self):
3464 self._test_header_encode(b'hello\nworld',
3465 '=?iso-8859-1?q?hello=0Aworld?=')
3466
3467 def test_header_encode_non_ascii(self):
3468 self._test_header_encode(b'hello\xc7there',
3469 '=?iso-8859-1?q?hello=C7there?=')
3470
3471 def test_header_encode_alt_charset(self):
3472 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3473 charset='iso-8859-2')
3474
3475 def _test_header_decode(self, encoded_header, expected_decoded_header):
3476 decoded_header = quoprimime.header_decode(encoded_header)
3477 self.assertEqual(decoded_header, expected_decoded_header)
3478
3479 def test_header_decode_null(self):
3480 self._test_header_decode('', '')
3481
3482 def test_header_decode_one_word(self):
3483 self._test_header_decode('hello', 'hello')
3484
3485 def test_header_decode_two_lines(self):
3486 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3487
3488 def test_header_decode_non_ascii(self):
3489 self._test_header_decode('hello=C7there', 'hello\xc7there')
3490
3491 def _test_decode(self, encoded, expected_decoded, eol=None):
3492 if eol is None:
3493 decoded = quoprimime.decode(encoded)
3494 else:
3495 decoded = quoprimime.decode(encoded, eol=eol)
3496 self.assertEqual(decoded, expected_decoded)
3497
3498 def test_decode_null_word(self):
3499 self._test_decode('', '')
3500
3501 def test_decode_null_line_null_word(self):
3502 self._test_decode('\r\n', '\n')
3503
3504 def test_decode_one_word(self):
3505 self._test_decode('hello', 'hello')
3506
3507 def test_decode_one_word_eol(self):
3508 self._test_decode('hello', 'hello', eol='X')
3509
3510 def test_decode_one_line(self):
3511 self._test_decode('hello\r\n', 'hello\n')
3512
3513 def test_decode_one_line_lf(self):
3514 self._test_decode('hello\n', 'hello\n')
3515
R David Murraycafd79d2011-03-23 15:25:55 -04003516 def test_decode_one_line_cr(self):
3517 self._test_decode('hello\r', 'hello\n')
3518
3519 def test_decode_one_line_nl(self):
3520 self._test_decode('hello\n', 'helloX', eol='X')
3521
3522 def test_decode_one_line_crnl(self):
3523 self._test_decode('hello\r\n', 'helloX', eol='X')
3524
R David Murrayec1b5b82011-03-23 14:19:05 -04003525 def test_decode_one_line_one_word(self):
3526 self._test_decode('hello\r\nworld', 'hello\nworld')
3527
3528 def test_decode_one_line_one_word_eol(self):
3529 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3530
3531 def test_decode_two_lines(self):
3532 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3533
R David Murraycafd79d2011-03-23 15:25:55 -04003534 def test_decode_two_lines_eol(self):
3535 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3536
R David Murrayec1b5b82011-03-23 14:19:05 -04003537 def test_decode_one_long_line(self):
3538 self._test_decode('Spam' * 250, 'Spam' * 250)
3539
3540 def test_decode_one_space(self):
3541 self._test_decode(' ', '')
3542
3543 def test_decode_multiple_spaces(self):
3544 self._test_decode(' ' * 5, '')
3545
3546 def test_decode_one_line_trailing_spaces(self):
3547 self._test_decode('hello \r\n', 'hello\n')
3548
3549 def test_decode_two_lines_trailing_spaces(self):
3550 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3551
3552 def test_decode_quoted_word(self):
3553 self._test_decode('=22quoted=20words=22', '"quoted words"')
3554
3555 def test_decode_uppercase_quoting(self):
3556 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
3557
3558 def test_decode_lowercase_quoting(self):
3559 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
3560
3561 def test_decode_soft_line_break(self):
3562 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
3563
3564 def test_decode_false_quoting(self):
3565 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
3566
3567 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
3568 kwargs = {}
3569 if maxlinelen is None:
3570 # Use body_encode's default.
3571 maxlinelen = 76
3572 else:
3573 kwargs['maxlinelen'] = maxlinelen
3574 if eol is None:
3575 # Use body_encode's default.
3576 eol = '\n'
3577 else:
3578 kwargs['eol'] = eol
3579 encoded_body = quoprimime.body_encode(body, **kwargs)
3580 self.assertEqual(encoded_body, expected_encoded_body)
3581 if eol == '\n' or eol == '\r\n':
3582 # We know how to split the result back into lines, so maxlinelen
3583 # can be checked.
3584 for line in encoded_body.splitlines():
3585 self.assertLessEqual(len(line), maxlinelen)
3586
3587 def test_encode_null(self):
3588 self._test_encode('', '')
3589
3590 def test_encode_null_lines(self):
3591 self._test_encode('\n\n', '\n\n')
3592
3593 def test_encode_one_line(self):
3594 self._test_encode('hello\n', 'hello\n')
3595
3596 def test_encode_one_line_crlf(self):
3597 self._test_encode('hello\r\n', 'hello\n')
3598
3599 def test_encode_one_line_eol(self):
3600 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
3601
3602 def test_encode_one_space(self):
3603 self._test_encode(' ', '=20')
3604
3605 def test_encode_one_line_one_space(self):
3606 self._test_encode(' \n', '=20\n')
3607
R David Murrayb938c8c2011-03-24 12:19:26 -04003608# XXX: body_encode() expect strings, but uses ord(char) from these strings
3609# to index into a 256-entry list. For code points above 255, this will fail.
3610# Should there be a check for 8-bit only ord() values in body, or at least
3611# a comment about the expected input?
3612
3613 def test_encode_two_lines_one_space(self):
3614 self._test_encode(' \n \n', '=20\n=20\n')
3615
R David Murrayec1b5b82011-03-23 14:19:05 -04003616 def test_encode_one_word_trailing_spaces(self):
3617 self._test_encode('hello ', 'hello =20')
3618
3619 def test_encode_one_line_trailing_spaces(self):
3620 self._test_encode('hello \n', 'hello =20\n')
3621
3622 def test_encode_one_word_trailing_tab(self):
3623 self._test_encode('hello \t', 'hello =09')
3624
3625 def test_encode_one_line_trailing_tab(self):
3626 self._test_encode('hello \t\n', 'hello =09\n')
3627
3628 def test_encode_trailing_space_before_maxlinelen(self):
3629 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
3630
R David Murrayb938c8c2011-03-24 12:19:26 -04003631 def test_encode_trailing_space_at_maxlinelen(self):
3632 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
3633
R David Murrayec1b5b82011-03-23 14:19:05 -04003634 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04003635 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
3636
3637 def test_encode_whitespace_lines(self):
3638 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04003639
3640 def test_encode_quoted_equals(self):
3641 self._test_encode('a = b', 'a =3D b')
3642
3643 def test_encode_one_long_string(self):
3644 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
3645
3646 def test_encode_one_long_line(self):
3647 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3648
3649 def test_encode_one_very_long_line(self):
3650 self._test_encode('x' * 200 + '\n',
3651 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
3652
3653 def test_encode_one_long_line(self):
3654 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3655
3656 def test_encode_shortest_maxlinelen(self):
3657 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003658
R David Murrayb938c8c2011-03-24 12:19:26 -04003659 def test_encode_maxlinelen_too_small(self):
3660 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
3661
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003662 def test_encode(self):
3663 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003664 eq(quoprimime.body_encode(''), '')
3665 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003666 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003667 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003668 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003669 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003670xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3671 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3672x xxxx xxxx xxxx xxxx=20""")
3673 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003674 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3675 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003676xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3677 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3678x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003679 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003680one line
3681
3682two line"""), """\
3683one line
3684
3685two line""")
3686
3687
Ezio Melottib3aedd42010-11-20 19:04:17 +00003688
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003689# Test the Charset class
3690class TestCharset(unittest.TestCase):
3691 def tearDown(self):
3692 from email import charset as CharsetModule
3693 try:
3694 del CharsetModule.CHARSETS['fake']
3695 except KeyError:
3696 pass
3697
Guido van Rossum9604e662007-08-30 03:46:43 +00003698 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003699 eq = self.assertEqual
3700 # Make sure us-ascii = no Unicode conversion
3701 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00003702 eq(c.header_encode('Hello World!'), 'Hello World!')
3703 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003704 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00003705 self.assertRaises(UnicodeError, c.header_encode, s)
3706 c = Charset('utf-8')
3707 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003708
3709 def test_body_encode(self):
3710 eq = self.assertEqual
3711 # Try a charset with QP body encoding
3712 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003713 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003714 # Try a charset with Base64 body encoding
3715 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003716 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003717 # Try a charset with None body encoding
3718 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003719 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003720 # Try the convert argument, where input codec != output codec
3721 c = Charset('euc-jp')
3722 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00003723 # XXX FIXME
3724## try:
3725## eq('\x1b$B5FCO;~IW\x1b(B',
3726## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
3727## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
3728## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
3729## except LookupError:
3730## # We probably don't have the Japanese codecs installed
3731## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003732 # Testing SF bug #625509, which we have to fake, since there are no
3733 # built-in encodings where the header encoding is QP but the body
3734 # encoding is not.
3735 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04003736 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003737 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04003738 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003739
3740 def test_unicode_charset_name(self):
3741 charset = Charset('us-ascii')
3742 self.assertEqual(str(charset), 'us-ascii')
3743 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
3744
3745
Ezio Melottib3aedd42010-11-20 19:04:17 +00003746
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003747# Test multilingual MIME headers.
3748class TestHeader(TestEmailBase):
3749 def test_simple(self):
3750 eq = self.ndiffAssertEqual
3751 h = Header('Hello World!')
3752 eq(h.encode(), 'Hello World!')
3753 h.append(' Goodbye World!')
3754 eq(h.encode(), 'Hello World! Goodbye World!')
3755
3756 def test_simple_surprise(self):
3757 eq = self.ndiffAssertEqual
3758 h = Header('Hello World!')
3759 eq(h.encode(), 'Hello World!')
3760 h.append('Goodbye World!')
3761 eq(h.encode(), 'Hello World! Goodbye World!')
3762
3763 def test_header_needs_no_decoding(self):
3764 h = 'no decoding needed'
3765 self.assertEqual(decode_header(h), [(h, None)])
3766
3767 def test_long(self):
3768 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
3769 maxlinelen=76)
3770 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003771 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003772
3773 def test_multilingual(self):
3774 eq = self.ndiffAssertEqual
3775 g = Charset("iso-8859-1")
3776 cz = Charset("iso-8859-2")
3777 utf8 = Charset("utf-8")
3778 g_head = (b'Die Mieter treten hier ein werden mit einem '
3779 b'Foerderband komfortabel den Korridor entlang, '
3780 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
3781 b'gegen die rotierenden Klingen bef\xf6rdert. ')
3782 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
3783 b'd\xf9vtipu.. ')
3784 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
3785 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
3786 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
3787 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
3788 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
3789 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
3790 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
3791 '\u3044\u307e\u3059\u3002')
3792 h = Header(g_head, g)
3793 h.append(cz_head, cz)
3794 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00003795 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003796 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00003797=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
3798 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
3799 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
3800 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003801 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
3802 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
3803 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
3804 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00003805 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
3806 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
3807 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
3808 decoded = decode_header(enc)
3809 eq(len(decoded), 3)
3810 eq(decoded[0], (g_head, 'iso-8859-1'))
3811 eq(decoded[1], (cz_head, 'iso-8859-2'))
3812 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003813 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00003814 eq(ustr,
3815 (b'Die Mieter treten hier ein werden mit einem Foerderband '
3816 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
3817 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
3818 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
3819 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
3820 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
3821 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
3822 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
3823 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
3824 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
3825 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
3826 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
3827 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
3828 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
3829 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
3830 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
3831 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003832 # Test make_header()
3833 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00003834 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003835
3836 def test_empty_header_encode(self):
3837 h = Header()
3838 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00003839
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003840 def test_header_ctor_default_args(self):
3841 eq = self.ndiffAssertEqual
3842 h = Header()
3843 eq(h, '')
3844 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00003845 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003846
3847 def test_explicit_maxlinelen(self):
3848 eq = self.ndiffAssertEqual
3849 hstr = ('A very long line that must get split to something other '
3850 'than at the 76th character boundary to test the non-default '
3851 'behavior')
3852 h = Header(hstr)
3853 eq(h.encode(), '''\
3854A very long line that must get split to something other than at the 76th
3855 character boundary to test the non-default behavior''')
3856 eq(str(h), hstr)
3857 h = Header(hstr, header_name='Subject')
3858 eq(h.encode(), '''\
3859A very long line that must get split to something other than at the
3860 76th character boundary to test the non-default behavior''')
3861 eq(str(h), hstr)
3862 h = Header(hstr, maxlinelen=1024, header_name='Subject')
3863 eq(h.encode(), hstr)
3864 eq(str(h), hstr)
3865
Guido van Rossum9604e662007-08-30 03:46:43 +00003866 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003867 eq = self.ndiffAssertEqual
3868 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003869 x = 'xxxx ' * 20
3870 h.append(x)
3871 s = h.encode()
3872 eq(s, """\
3873=?iso-8859-1?q?xxx?=
3874 =?iso-8859-1?q?x_?=
3875 =?iso-8859-1?q?xx?=
3876 =?iso-8859-1?q?xx?=
3877 =?iso-8859-1?q?_x?=
3878 =?iso-8859-1?q?xx?=
3879 =?iso-8859-1?q?x_?=
3880 =?iso-8859-1?q?xx?=
3881 =?iso-8859-1?q?xx?=
3882 =?iso-8859-1?q?_x?=
3883 =?iso-8859-1?q?xx?=
3884 =?iso-8859-1?q?x_?=
3885 =?iso-8859-1?q?xx?=
3886 =?iso-8859-1?q?xx?=
3887 =?iso-8859-1?q?_x?=
3888 =?iso-8859-1?q?xx?=
3889 =?iso-8859-1?q?x_?=
3890 =?iso-8859-1?q?xx?=
3891 =?iso-8859-1?q?xx?=
3892 =?iso-8859-1?q?_x?=
3893 =?iso-8859-1?q?xx?=
3894 =?iso-8859-1?q?x_?=
3895 =?iso-8859-1?q?xx?=
3896 =?iso-8859-1?q?xx?=
3897 =?iso-8859-1?q?_x?=
3898 =?iso-8859-1?q?xx?=
3899 =?iso-8859-1?q?x_?=
3900 =?iso-8859-1?q?xx?=
3901 =?iso-8859-1?q?xx?=
3902 =?iso-8859-1?q?_x?=
3903 =?iso-8859-1?q?xx?=
3904 =?iso-8859-1?q?x_?=
3905 =?iso-8859-1?q?xx?=
3906 =?iso-8859-1?q?xx?=
3907 =?iso-8859-1?q?_x?=
3908 =?iso-8859-1?q?xx?=
3909 =?iso-8859-1?q?x_?=
3910 =?iso-8859-1?q?xx?=
3911 =?iso-8859-1?q?xx?=
3912 =?iso-8859-1?q?_x?=
3913 =?iso-8859-1?q?xx?=
3914 =?iso-8859-1?q?x_?=
3915 =?iso-8859-1?q?xx?=
3916 =?iso-8859-1?q?xx?=
3917 =?iso-8859-1?q?_x?=
3918 =?iso-8859-1?q?xx?=
3919 =?iso-8859-1?q?x_?=
3920 =?iso-8859-1?q?xx?=
3921 =?iso-8859-1?q?xx?=
3922 =?iso-8859-1?q?_?=""")
3923 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003924 h = Header(charset='iso-8859-1', maxlinelen=40)
3925 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003926 s = h.encode()
3927 eq(s, """\
3928=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
3929 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
3930 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
3931 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
3932 =?iso-8859-1?q?_xxxx_xxxx_?=""")
3933 eq(x, str(make_header(decode_header(s))))
3934
3935 def test_base64_splittable(self):
3936 eq = self.ndiffAssertEqual
3937 h = Header(charset='koi8-r', maxlinelen=20)
3938 x = 'xxxx ' * 20
3939 h.append(x)
3940 s = h.encode()
3941 eq(s, """\
3942=?koi8-r?b?eHh4?=
3943 =?koi8-r?b?eCB4?=
3944 =?koi8-r?b?eHh4?=
3945 =?koi8-r?b?IHh4?=
3946 =?koi8-r?b?eHgg?=
3947 =?koi8-r?b?eHh4?=
3948 =?koi8-r?b?eCB4?=
3949 =?koi8-r?b?eHh4?=
3950 =?koi8-r?b?IHh4?=
3951 =?koi8-r?b?eHgg?=
3952 =?koi8-r?b?eHh4?=
3953 =?koi8-r?b?eCB4?=
3954 =?koi8-r?b?eHh4?=
3955 =?koi8-r?b?IHh4?=
3956 =?koi8-r?b?eHgg?=
3957 =?koi8-r?b?eHh4?=
3958 =?koi8-r?b?eCB4?=
3959 =?koi8-r?b?eHh4?=
3960 =?koi8-r?b?IHh4?=
3961 =?koi8-r?b?eHgg?=
3962 =?koi8-r?b?eHh4?=
3963 =?koi8-r?b?eCB4?=
3964 =?koi8-r?b?eHh4?=
3965 =?koi8-r?b?IHh4?=
3966 =?koi8-r?b?eHgg?=
3967 =?koi8-r?b?eHh4?=
3968 =?koi8-r?b?eCB4?=
3969 =?koi8-r?b?eHh4?=
3970 =?koi8-r?b?IHh4?=
3971 =?koi8-r?b?eHgg?=
3972 =?koi8-r?b?eHh4?=
3973 =?koi8-r?b?eCB4?=
3974 =?koi8-r?b?eHh4?=
3975 =?koi8-r?b?IA==?=""")
3976 eq(x, str(make_header(decode_header(s))))
3977 h = Header(charset='koi8-r', maxlinelen=40)
3978 h.append(x)
3979 s = h.encode()
3980 eq(s, """\
3981=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
3982 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
3983 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
3984 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
3985 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
3986 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
3987 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003988
3989 def test_us_ascii_header(self):
3990 eq = self.assertEqual
3991 s = 'hello'
3992 x = decode_header(s)
3993 eq(x, [('hello', None)])
3994 h = make_header(x)
3995 eq(s, h.encode())
3996
3997 def test_string_charset(self):
3998 eq = self.assertEqual
3999 h = Header()
4000 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004001 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004002
4003## def test_unicode_error(self):
4004## raises = self.assertRaises
4005## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4006## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4007## h = Header()
4008## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4009## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4010## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4011
4012 def test_utf8_shortest(self):
4013 eq = self.assertEqual
4014 h = Header('p\xf6stal', 'utf-8')
4015 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4016 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4017 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4018
4019 def test_bad_8bit_header(self):
4020 raises = self.assertRaises
4021 eq = self.assertEqual
4022 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4023 raises(UnicodeError, Header, x)
4024 h = Header()
4025 raises(UnicodeError, h.append, x)
4026 e = x.decode('utf-8', 'replace')
4027 eq(str(Header(x, errors='replace')), e)
4028 h.append(x, errors='replace')
4029 eq(str(h), e)
4030
R David Murray041015c2011-03-25 15:10:55 -04004031 def test_escaped_8bit_header(self):
4032 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4033 x = x.decode('ascii', 'surrogateescape')
4034 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4035 self.assertEqual(str(h),
4036 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4037 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4038
4039 def test_modify_returned_list_does_not_change_header(self):
4040 h = Header('test')
4041 chunks = email.header.decode_header(h)
4042 chunks.append(('ascii', 'test2'))
4043 self.assertEqual(str(h), 'test')
4044
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004045 def test_encoded_adjacent_nonencoded(self):
4046 eq = self.assertEqual
4047 h = Header()
4048 h.append('hello', 'iso-8859-1')
4049 h.append('world')
4050 s = h.encode()
4051 eq(s, '=?iso-8859-1?q?hello?= world')
4052 h = make_header(decode_header(s))
4053 eq(h.encode(), s)
4054
4055 def test_whitespace_eater(self):
4056 eq = self.assertEqual
4057 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4058 parts = decode_header(s)
4059 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
4060 hdr = make_header(parts)
4061 eq(hdr.encode(),
4062 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4063
4064 def test_broken_base64_header(self):
4065 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004066 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004067 raises(errors.HeaderParseError, decode_header, s)
4068
R. David Murray477efb32011-01-05 01:39:32 +00004069 def test_shift_jis_charset(self):
4070 h = Header('文', charset='shift_jis')
4071 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4072
R David Murrayde912762011-03-16 18:26:23 -04004073 def test_flatten_header_with_no_value(self):
4074 # Issue 11401 (regression from email 4.x) Note that the space after
4075 # the header doesn't reflect the input, but this is also the way
4076 # email 4.x behaved. At some point it would be nice to fix that.
4077 msg = email.message_from_string("EmptyHeader:")
4078 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4079
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004080
Ezio Melottib3aedd42010-11-20 19:04:17 +00004081
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004082# Test RFC 2231 header parameters (en/de)coding
4083class TestRFC2231(TestEmailBase):
4084 def test_get_param(self):
4085 eq = self.assertEqual
4086 msg = self._msgobj('msg_29.txt')
4087 eq(msg.get_param('title'),
4088 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4089 eq(msg.get_param('title', unquote=False),
4090 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4091
4092 def test_set_param(self):
4093 eq = self.ndiffAssertEqual
4094 msg = Message()
4095 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4096 charset='us-ascii')
4097 eq(msg.get_param('title'),
4098 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4099 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4100 charset='us-ascii', language='en')
4101 eq(msg.get_param('title'),
4102 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4103 msg = self._msgobj('msg_01.txt')
4104 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4105 charset='us-ascii', language='en')
4106 eq(msg.as_string(maxheaderlen=78), """\
4107Return-Path: <bbb@zzz.org>
4108Delivered-To: bbb@zzz.org
4109Received: by mail.zzz.org (Postfix, from userid 889)
4110\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4111MIME-Version: 1.0
4112Content-Transfer-Encoding: 7bit
4113Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4114From: bbb@ddd.com (John X. Doe)
4115To: bbb@zzz.org
4116Subject: This is a test message
4117Date: Fri, 4 May 2001 14:05:44 -0400
4118Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004119 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004120
4121
4122Hi,
4123
4124Do you like this message?
4125
4126-Me
4127""")
4128
4129 def test_del_param(self):
4130 eq = self.ndiffAssertEqual
4131 msg = self._msgobj('msg_01.txt')
4132 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4133 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4134 charset='us-ascii', language='en')
4135 msg.del_param('foo', header='Content-Type')
4136 eq(msg.as_string(maxheaderlen=78), """\
4137Return-Path: <bbb@zzz.org>
4138Delivered-To: bbb@zzz.org
4139Received: by mail.zzz.org (Postfix, from userid 889)
4140\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4141MIME-Version: 1.0
4142Content-Transfer-Encoding: 7bit
4143Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4144From: bbb@ddd.com (John X. Doe)
4145To: bbb@zzz.org
4146Subject: This is a test message
4147Date: Fri, 4 May 2001 14:05:44 -0400
4148Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004149 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004150
4151
4152Hi,
4153
4154Do you like this message?
4155
4156-Me
4157""")
4158
4159 def test_rfc2231_get_content_charset(self):
4160 eq = self.assertEqual
4161 msg = self._msgobj('msg_32.txt')
4162 eq(msg.get_content_charset(), 'us-ascii')
4163
R. David Murraydfd7eb02010-12-24 22:36:49 +00004164 def test_rfc2231_parse_rfc_quoting(self):
4165 m = textwrap.dedent('''\
4166 Content-Disposition: inline;
4167 \tfilename*0*=''This%20is%20even%20more%20;
4168 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4169 \tfilename*2="is it not.pdf"
4170
4171 ''')
4172 msg = email.message_from_string(m)
4173 self.assertEqual(msg.get_filename(),
4174 'This is even more ***fun*** is it not.pdf')
4175 self.assertEqual(m, msg.as_string())
4176
4177 def test_rfc2231_parse_extra_quoting(self):
4178 m = textwrap.dedent('''\
4179 Content-Disposition: inline;
4180 \tfilename*0*="''This%20is%20even%20more%20";
4181 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4182 \tfilename*2="is it not.pdf"
4183
4184 ''')
4185 msg = email.message_from_string(m)
4186 self.assertEqual(msg.get_filename(),
4187 'This is even more ***fun*** is it not.pdf')
4188 self.assertEqual(m, msg.as_string())
4189
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004190 def test_rfc2231_no_language_or_charset(self):
4191 m = '''\
4192Content-Transfer-Encoding: 8bit
4193Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4194Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4195
4196'''
4197 msg = email.message_from_string(m)
4198 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004199 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004200 self.assertEqual(
4201 param,
4202 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4203
4204 def test_rfc2231_no_language_or_charset_in_filename(self):
4205 m = '''\
4206Content-Disposition: inline;
4207\tfilename*0*="''This%20is%20even%20more%20";
4208\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4209\tfilename*2="is it not.pdf"
4210
4211'''
4212 msg = email.message_from_string(m)
4213 self.assertEqual(msg.get_filename(),
4214 'This is even more ***fun*** is it not.pdf')
4215
4216 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4217 m = '''\
4218Content-Disposition: inline;
4219\tfilename*0*="''This%20is%20even%20more%20";
4220\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4221\tfilename*2="is it not.pdf"
4222
4223'''
4224 msg = email.message_from_string(m)
4225 self.assertEqual(msg.get_filename(),
4226 'This is even more ***fun*** is it not.pdf')
4227
4228 def test_rfc2231_partly_encoded(self):
4229 m = '''\
4230Content-Disposition: inline;
4231\tfilename*0="''This%20is%20even%20more%20";
4232\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4233\tfilename*2="is it not.pdf"
4234
4235'''
4236 msg = email.message_from_string(m)
4237 self.assertEqual(
4238 msg.get_filename(),
4239 'This%20is%20even%20more%20***fun*** is it not.pdf')
4240
4241 def test_rfc2231_partly_nonencoded(self):
4242 m = '''\
4243Content-Disposition: inline;
4244\tfilename*0="This%20is%20even%20more%20";
4245\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4246\tfilename*2="is it not.pdf"
4247
4248'''
4249 msg = email.message_from_string(m)
4250 self.assertEqual(
4251 msg.get_filename(),
4252 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4253
4254 def test_rfc2231_no_language_or_charset_in_boundary(self):
4255 m = '''\
4256Content-Type: multipart/alternative;
4257\tboundary*0*="''This%20is%20even%20more%20";
4258\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4259\tboundary*2="is it not.pdf"
4260
4261'''
4262 msg = email.message_from_string(m)
4263 self.assertEqual(msg.get_boundary(),
4264 'This is even more ***fun*** is it not.pdf')
4265
4266 def test_rfc2231_no_language_or_charset_in_charset(self):
4267 # This is a nonsensical charset value, but tests the code anyway
4268 m = '''\
4269Content-Type: text/plain;
4270\tcharset*0*="This%20is%20even%20more%20";
4271\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4272\tcharset*2="is it not.pdf"
4273
4274'''
4275 msg = email.message_from_string(m)
4276 self.assertEqual(msg.get_content_charset(),
4277 'this is even more ***fun*** is it not.pdf')
4278
4279 def test_rfc2231_bad_encoding_in_filename(self):
4280 m = '''\
4281Content-Disposition: inline;
4282\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4283\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4284\tfilename*2="is it not.pdf"
4285
4286'''
4287 msg = email.message_from_string(m)
4288 self.assertEqual(msg.get_filename(),
4289 'This is even more ***fun*** is it not.pdf')
4290
4291 def test_rfc2231_bad_encoding_in_charset(self):
4292 m = """\
4293Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4294
4295"""
4296 msg = email.message_from_string(m)
4297 # This should return None because non-ascii characters in the charset
4298 # are not allowed.
4299 self.assertEqual(msg.get_content_charset(), None)
4300
4301 def test_rfc2231_bad_character_in_charset(self):
4302 m = """\
4303Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4304
4305"""
4306 msg = email.message_from_string(m)
4307 # This should return None because non-ascii characters in the charset
4308 # are not allowed.
4309 self.assertEqual(msg.get_content_charset(), None)
4310
4311 def test_rfc2231_bad_character_in_filename(self):
4312 m = '''\
4313Content-Disposition: inline;
4314\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4315\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4316\tfilename*2*="is it not.pdf%E2"
4317
4318'''
4319 msg = email.message_from_string(m)
4320 self.assertEqual(msg.get_filename(),
4321 'This is even more ***fun*** is it not.pdf\ufffd')
4322
4323 def test_rfc2231_unknown_encoding(self):
4324 m = """\
4325Content-Transfer-Encoding: 8bit
4326Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4327
4328"""
4329 msg = email.message_from_string(m)
4330 self.assertEqual(msg.get_filename(), 'myfile.txt')
4331
4332 def test_rfc2231_single_tick_in_filename_extended(self):
4333 eq = self.assertEqual
4334 m = """\
4335Content-Type: application/x-foo;
4336\tname*0*=\"Frank's\"; name*1*=\" Document\"
4337
4338"""
4339 msg = email.message_from_string(m)
4340 charset, language, s = msg.get_param('name')
4341 eq(charset, None)
4342 eq(language, None)
4343 eq(s, "Frank's Document")
4344
4345 def test_rfc2231_single_tick_in_filename(self):
4346 m = """\
4347Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4348
4349"""
4350 msg = email.message_from_string(m)
4351 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004352 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004353 self.assertEqual(param, "Frank's Document")
4354
4355 def test_rfc2231_tick_attack_extended(self):
4356 eq = self.assertEqual
4357 m = """\
4358Content-Type: application/x-foo;
4359\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4360
4361"""
4362 msg = email.message_from_string(m)
4363 charset, language, s = msg.get_param('name')
4364 eq(charset, 'us-ascii')
4365 eq(language, 'en-us')
4366 eq(s, "Frank's Document")
4367
4368 def test_rfc2231_tick_attack(self):
4369 m = """\
4370Content-Type: application/x-foo;
4371\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4372
4373"""
4374 msg = email.message_from_string(m)
4375 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004376 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004377 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4378
4379 def test_rfc2231_no_extended_values(self):
4380 eq = self.assertEqual
4381 m = """\
4382Content-Type: application/x-foo; name=\"Frank's Document\"
4383
4384"""
4385 msg = email.message_from_string(m)
4386 eq(msg.get_param('name'), "Frank's Document")
4387
4388 def test_rfc2231_encoded_then_unencoded_segments(self):
4389 eq = self.assertEqual
4390 m = """\
4391Content-Type: application/x-foo;
4392\tname*0*=\"us-ascii'en-us'My\";
4393\tname*1=\" Document\";
4394\tname*2*=\" For You\"
4395
4396"""
4397 msg = email.message_from_string(m)
4398 charset, language, s = msg.get_param('name')
4399 eq(charset, 'us-ascii')
4400 eq(language, 'en-us')
4401 eq(s, 'My Document For You')
4402
4403 def test_rfc2231_unencoded_then_encoded_segments(self):
4404 eq = self.assertEqual
4405 m = """\
4406Content-Type: application/x-foo;
4407\tname*0=\"us-ascii'en-us'My\";
4408\tname*1*=\" Document\";
4409\tname*2*=\" For You\"
4410
4411"""
4412 msg = email.message_from_string(m)
4413 charset, language, s = msg.get_param('name')
4414 eq(charset, 'us-ascii')
4415 eq(language, 'en-us')
4416 eq(s, 'My Document For You')
4417
4418
Ezio Melottib3aedd42010-11-20 19:04:17 +00004419
R. David Murraya8f480f2010-01-16 18:30:03 +00004420# Tests to ensure that signed parts of an email are completely preserved, as
4421# required by RFC1847 section 2.1. Note that these are incomplete, because the
4422# email package does not currently always preserve the body. See issue 1670765.
4423class TestSigned(TestEmailBase):
4424
4425 def _msg_and_obj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -04004426 with openfile(filename) as fp:
R. David Murraya8f480f2010-01-16 18:30:03 +00004427 original = fp.read()
4428 msg = email.message_from_string(original)
4429 return original, msg
4430
4431 def _signed_parts_eq(self, original, result):
4432 # Extract the first mime part of each message
4433 import re
4434 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4435 inpart = repart.search(original).group(2)
4436 outpart = repart.search(result).group(2)
4437 self.assertEqual(outpart, inpart)
4438
4439 def test_long_headers_as_string(self):
4440 original, msg = self._msg_and_obj('msg_45.txt')
4441 result = msg.as_string()
4442 self._signed_parts_eq(original, result)
4443
4444 def test_long_headers_as_string_maxheaderlen(self):
4445 original, msg = self._msg_and_obj('msg_45.txt')
4446 result = msg.as_string(maxheaderlen=60)
4447 self._signed_parts_eq(original, result)
4448
4449 def test_long_headers_flatten(self):
4450 original, msg = self._msg_and_obj('msg_45.txt')
4451 fp = StringIO()
4452 Generator(fp).flatten(msg)
4453 result = fp.getvalue()
4454 self._signed_parts_eq(original, result)
4455
4456
Ezio Melottib3aedd42010-11-20 19:04:17 +00004457
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004458if __name__ == '__main__':
R David Murray9aaba782011-03-21 17:17:06 -04004459 unittest.main()